diff options
934 files changed, 32026 insertions, 15686 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 71a74260ab..80c95a08bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,6 +193,9 @@ if( LLVM_USE_OPROFILE ) endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) endif( LLVM_USE_OPROFILE ) +set(LLVM_USE_SANITIZER "" CACHE STRING + "Define the sanitizer used to build binaries and tests.") + # Define an option controlling whether we should build for 32-bit on 64-bit # platforms, where supported. if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) diff --git a/CREDITS.TXT b/CREDITS.TXT index 26733abb81..e89f19e794 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -423,7 +423,6 @@ D: Thread Local Storage implementation N: Bill Wendling E: wendling@apple.com -D: Exception handling D: Bunches of stuff N: Bob Wilson diff --git a/Makefile.config.in b/Makefile.config.in index d3b57e9f10..26e3709fee 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -375,8 +375,6 @@ COVERED_SWITCH_DEFAULT = @COVERED_SWITCH_DEFAULT@ NO_UNINITIALIZED = @NO_UNINITIALIZED@ # -Wno-maybe-uninitialized NO_MAYBE_UNINITIALIZED = @NO_MAYBE_UNINITIALIZED@ -# -Wno-nested-anon-types -NO_NESTED_ANON_TYPES = @NO_NESTED_ANON_TYPES@ # Was polly found in tools/polly? LLVM_HAS_POLLY = @LLVM_HAS_POLLY@ diff --git a/Makefile.rules b/Makefile.rules index 63e9d9be2b..2c834aac63 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -667,7 +667,7 @@ LD.Flags += $(EXTRA_LD_OPTIONS) endif ifndef NO_PEDANTIC -CompileCommonOpts += -pedantic -Wno-long-long $(NO_NESTED_ANON_TYPES) +CompileCommonOpts += -pedantic -Wno-long-long endif CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \ $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT) \ diff --git a/autoconf/configure.ac b/autoconf/configure.ac index b8f1766228..0097db3d68 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -1270,7 +1270,6 @@ dnl Check optional compiler flags. AC_MSG_CHECKING([optional compiler flags]) CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros]) CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers]) -CXX_FLAG_CHECK(NO_NESTED_ANON_TYPES, [-Wno-nested-anon-types]) CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default]) dnl GCC's potential uninitialized use analysis is weak and presents lots of @@ -1288,7 +1287,7 @@ then CXX_FLAG_CHECK(NO_UNINITIALIZED, [-Wno-uninitialized]) fi fi -AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $NO_NESTED_ANON_TYPES $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED]) +AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED]) AC_ARG_WITH([python], [AS_HELP_STRING([--with-python], [path to python])], @@ -1494,6 +1493,7 @@ AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h]) AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h]) AC_CHECK_HEADERS([valgrind/valgrind.h]) AC_CHECK_HEADERS([fenv.h]) +AC_CHECK_DECLS([FE_ALL_EXCEPT, FE_INEXACT], [], [], [[#include <fenv.h>]]) if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then AC_CHECK_HEADERS(pthread.h, AC_SUBST(HAVE_PTHREAD, 1), diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 7496622542..7cad190c11 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -80,6 +80,9 @@ check_include_file(utime.h HAVE_UTIME_H) check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H) check_include_file(windows.h HAVE_WINDOWS_H) check_include_file(fenv.h HAVE_FENV_H) +check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT) +check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT) + check_include_file(mach/mach.h HAVE_MACH_MACH_H) check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H) diff --git a/cmake/modules/GetSVN.cmake b/cmake/modules/GetSVN.cmake new file mode 100644 index 0000000000..acccc12a94 --- /dev/null +++ b/cmake/modules/GetSVN.cmake @@ -0,0 +1,25 @@ +# CMake project that writes Subversion revision information to a header. +# +# Input variables: +# FIRST_SOURCE_DIR - First source directory +# FIRST_REPOSITORY - The macro to define to the first revision number. +# SECOND_SOURCE_DIR - Second source directory +# SECOND_REPOSITORY - The macro to define to the second revision number. +# HEADER_FILE - The header file to write +include(FindSubversion) +if (Subversion_FOUND AND EXISTS "${FIRST_SOURCE_DIR}/.svn") + # Repository information for the first repository. + Subversion_WC_INFO(${FIRST_SOURCE_DIR} MY) + file(WRITE ${HEADER_FILE}.txt "#define ${FIRST_REPOSITORY} \"${MY_WC_REVISION}\"\n") + + # Repository information for the second repository. + if (EXISTS "${SECOND_SOURCE_DIR}/.svn") + Subversion_WC_INFO(${SECOND_SOURCE_DIR} MY) + file(APPEND ${HEADER_FILE}.txt + "#define ${SECOND_REPOSITORY} \"${MY_WC_REVISION}\"\n") + endif () + + # Copy the file only if it has changed. + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${HEADER_FILE}.txt ${HEADER_FILE}) +endif() diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index a83be41b22..4e59a3e3d9 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -59,6 +59,39 @@ else(WIN32) endif(UNIX) endif(WIN32) +function(add_flag_or_print_warning flag) + check_c_compiler_flag(${flag} C_SUPPORTS_FLAG) + check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG) + if (C_SUPPORTS_FLAG AND CXX_SUPPORTS_FLAG) + message(STATUS "Building with ${flag}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE) + else() + message(WARNING "${flag} is not supported.") + endif() +endfunction() + +function(append value) + foreach(variable ${ARGN}) + set(${variable} "${${variable}} ${value}" PARENT_SCOPE) + endforeach(variable) +endfunction() + +function(append_if condition value) + if (${condition}) + foreach(variable ${ARGN}) + set(${variable} "${${variable}} ${value}" PARENT_SCOPE) + endforeach(variable) + endif() +endfunction() + +macro(add_flag_if_supported flag) + check_c_compiler_flag(${flag} C_SUPPORTS_FLAG) + append_if(C_SUPPORTS_FLAG "${flag}" CMAKE_C_FLAGS) + check_cxx_compiler_flag(${flag} CXX_SUPPORTS_FLAG) + append_if(CXX_SUPPORTS_FLAG "${flag}" CMAKE_CXX_FLAGS) +endmacro() + if( LLVM_ENABLE_PIC ) if( XCODE ) # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't @@ -67,23 +100,14 @@ if( LLVM_ENABLE_PIC ) elseif( WIN32 OR CYGWIN) # On Windows all code is PIC. MinGW warns if -fPIC is used. else() - check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG) - if( SUPPORTS_FPIC_FLAG ) - message(STATUS "Building with -fPIC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") - else( SUPPORTS_FPIC_FLAG ) - message(WARNING "-fPIC not supported.") - endif() + add_flag_or_print_warning("-fPIC") if( WIN32 OR CYGWIN) # MinGW warns if -fvisibility-inlines-hidden is used. else() check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG) - if( SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG ) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") - endif() - endif() + append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS) + endif() endif() endif() @@ -177,7 +201,7 @@ if( MSVC ) endif (LLVM_ENABLE_WERROR) elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) if (LLVM_ENABLE_WARNINGS) - add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings ) + append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) # Turn off missing field initializer warnings for gcc to avoid noise from # false positives with empty {}. Turn them on otherwise (they're off by @@ -185,39 +209,59 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG) if (CMAKE_COMPILER_IS_GNUCXX) - add_llvm_definitions( -Wno-missing-field-initializers ) + append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) else() - add_llvm_definitions( -Wmissing-field-initializers ) + append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() endif() - if (LLVM_ENABLE_PEDANTIC) - add_llvm_definitions( -pedantic -Wno-long-long ) - check_cxx_compiler_flag("-Werror -Wnested-anon-types" CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG) - if( CXX_SUPPORTS_NO_NESTED_ANON_TYPES_FLAG ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-nested-anon-types" ) - endif() - endif (LLVM_ENABLE_PEDANTIC) + append_if(LLVM_ENABLE_PEDANTIC "-pedantic -Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) check_cxx_compiler_flag("-Werror -Wcovered-switch-default" CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG) - if( CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wcovered-switch-default" ) - endif() + append_if(CXX_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_CXX_FLAGS) check_c_compiler_flag("-Werror -Wcovered-switch-default" C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG) - if( C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG ) - set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wcovered-switch-default" ) - endif() - if (USE_NO_UNINITIALIZED) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-uninitialized") - endif() - if (USE_NO_MAYBE_UNINITIALIZED) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-maybe-uninitialized") - endif() + append_if(C_SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG "-Wcovered-switch-default" CMAKE_C_FLAGS) + append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS) + append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS) + check_cxx_compiler_flag("-Werror -Wnon-virtual-dtor" CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG) + append_if(CXX_SUPPORTS_NON_VIRTUAL_DTOR_FLAG "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS) endif (LLVM_ENABLE_WARNINGS) if (LLVM_ENABLE_WERROR) add_llvm_definitions( -Werror ) endif (LLVM_ENABLE_WERROR) endif( MSVC ) +macro(append_common_sanitizer_flags) + # Append -fno-omit-frame-pointer and turn on debug info to get better + # stack traces. + add_flag_if_supported("-fno-omit-frame-pointer") + if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND + NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO") + add_flag_if_supported("-gline-tables-only") + endif() +endmacro() + +# Turn on sanitizers if necessary. +if(LLVM_USE_SANITIZER) + if (LLVM_ON_UNIX) + if (LLVM_USE_SANITIZER STREQUAL "Address") + append_common_sanitizer_flags() + add_flag_or_print_warning("-fsanitize=address") + elseif (LLVM_USE_SANITIZER MATCHES "Memory(WithOrigins)?") + append_common_sanitizer_flags() + add_flag_or_print_warning("-fsanitize=memory") + # -pie is required for MSan. + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") + if(LLVM_USE_SANITIZER STREQUAL "MemoryWithOrigins") + add_flag_or_print_warning("-fsanitize-memory-track-origins") + endif() + else() + message(WARNING "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}") + endif() + else() + message(WARNING "LLVM_USE_SANITIZER is not supported on this platform.") + endif() +endif() + add_llvm_definitions( -D__STDC_CONSTANT_MACROS ) add_llvm_definitions( -D__STDC_FORMAT_MACROS ) add_llvm_definitions( -D__STDC_LIMIT_MACROS ) @@ -766,7 +766,6 @@ CONVENIENCE_LTDL_FALSE LIBADD_DL NO_VARIADIC_MACROS NO_MISSING_FIELD_INITIALIZERS -NO_NESTED_ANON_TYPES COVERED_SWITCH_DEFAULT NO_MAYBE_UNINITIALIZED NO_UNINITIALIZED @@ -10498,7 +10497,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 10501 "configure" +#line 10500 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -12265,8 +12264,6 @@ NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2 NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers` -NO_NESTED_ANON_TYPES=`$CXX -Werror -Wnested-anon-types -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-nested-anon-types` - COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default` @@ -12282,8 +12279,8 @@ then fi fi -{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $NO_NESTED_ANON_TYPES $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&5 -echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $NO_NESTED_ANON_TYPES $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&6; } +{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&5 +echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED" >&6; } # Check whether --with-python was given. @@ -15947,6 +15944,178 @@ fi done +{ echo "$as_me:$LINENO: checking whether FE_ALL_EXCEPT is declared" >&5 +echo $ECHO_N "checking whether FE_ALL_EXCEPT is declared... $ECHO_C" >&6; } +if test "${ac_cv_have_decl_FE_ALL_EXCEPT+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <fenv.h> + +int +main () +{ +#ifndef FE_ALL_EXCEPT + char *p = (char *) FE_ALL_EXCEPT; + return !p; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_have_decl_FE_ALL_EXCEPT=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_have_decl_FE_ALL_EXCEPT=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_ALL_EXCEPT" >&5 +echo "${ECHO_T}$ac_cv_have_decl_FE_ALL_EXCEPT" >&6; } +if test $ac_cv_have_decl_FE_ALL_EXCEPT = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_ALL_EXCEPT 1 +_ACEOF + + +else + cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_ALL_EXCEPT 0 +_ACEOF + + +fi +{ echo "$as_me:$LINENO: checking whether FE_INEXACT is declared" >&5 +echo $ECHO_N "checking whether FE_INEXACT is declared... $ECHO_C" >&6; } +if test "${ac_cv_have_decl_FE_INEXACT+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <fenv.h> + +int +main () +{ +#ifndef FE_INEXACT + char *p = (char *) FE_INEXACT; + return !p; +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_have_decl_FE_INEXACT=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_cv_have_decl_FE_INEXACT=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_FE_INEXACT" >&5 +echo "${ECHO_T}$ac_cv_have_decl_FE_INEXACT" >&6; } +if test $ac_cv_have_decl_FE_INEXACT = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_INEXACT 1 +_ACEOF + + +else + cat >>confdefs.h <<_ACEOF +#define HAVE_DECL_FE_INEXACT 0 +_ACEOF + + +fi + + if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then for ac_header in pthread.h @@ -22585,7 +22754,6 @@ CONVENIENCE_LTDL_FALSE!$CONVENIENCE_LTDL_FALSE$ac_delim LIBADD_DL!$LIBADD_DL$ac_delim NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim -NO_NESTED_ANON_TYPES!$NO_NESTED_ANON_TYPES$ac_delim COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim NO_MAYBE_UNINITIALIZED!$NO_MAYBE_UNINITIALIZED$ac_delim NO_UNINITIALIZED!$NO_UNINITIALIZED$ac_delim @@ -22615,6 +22783,7 @@ BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim ALL_BINDINGS!$ALL_BINDINGS$ac_delim OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim +RPATH!$RPATH$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -22656,14 +22825,13 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF -RPATH!$RPATH$ac_delim RDYNAMIC!$RDYNAMIC$ac_delim program_prefix!$program_prefix$ac_delim LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 5; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 4; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 diff --git a/docs/CMake.rst b/docs/CMake.rst index 6eab04b970..fb081d7b98 100644 --- a/docs/CMake.rst +++ b/docs/CMake.rst @@ -204,7 +204,7 @@ LLVM-specific variables tests. **LLVM_APPEND_VC_REV**:BOOL - Append version control revision info (svn revision number or git revision id) + Append version control revision info (svn revision number or Git revision id) to LLVM version string (stored in the PACKAGE_VERSION macro). For this to work cmake must be invoked before the build. Defaults to OFF. diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index b5d4180974..75415ab9cc 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -1038,6 +1038,24 @@ for your target. It has the following strengths: are used to manipulate the input immediate (in this case, take the high or low 16-bits of the immediate). +* When using the 'Pat' class to map a pattern to an instruction that has one + or more complex operands (like e.g. `X86 addressing mode`_), the pattern may + either specify the operand as a whole using a ``ComplexPattern``, or else it + may specify the components of the complex operand separately. The latter is + done e.g. for pre-increment instructions by the PowerPC back end: + + :: + + def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; + + def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff), + (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>; + + Here, the pair of ``ptroff`` and ``ptrreg`` operands is matched onto the + complex operand ``dst`` of class ``memri`` in the ``STWU`` instruction. + * While the system does automate a lot, it still allows you to write custom C++ code to match special cases if there is something that is hard to express. diff --git a/docs/CommandGuide/llvm-link.rst b/docs/CommandGuide/llvm-link.rst index e4f2228841..3bcfa68c25 100644 --- a/docs/CommandGuide/llvm-link.rst +++ b/docs/CommandGuide/llvm-link.rst @@ -1,5 +1,5 @@ -llvm-link - LLVM linker -======================= +llvm-link - LLVM bitcode linker +=============================== SYNOPSIS -------- @@ -13,23 +13,9 @@ DESCRIPTION into a single LLVM bitcode file. It writes the output file to standard output, unless the :option:`-o` option is used to specify a filename. -:program:`llvm-link` attempts to load the input files from the current -directory. If that fails, it looks for each file in each of the directories -specified by the :option:`-L` options on the command line. The library search -paths are global; each one is searched for every input file if necessary. The -directories are searched in the order they were specified on the command line. - OPTIONS ------- -.. option:: -L directory - - Add the specified ``directory`` to the library search path. When looking for - libraries, :program:`llvm-link` will look in path name for libraries. This - option can be specified multiple times; :program:`llvm-link` will search - inside these directories in the order in which they were specified on the - command line. - .. option:: -f Enable binary output on terminals. Normally, :program:`llvm-link` will refuse @@ -48,8 +34,8 @@ OPTIONS .. option:: -d - If specified, :program:`llvm-link` prints a human-readable version of the output - bitcode file to standard error. + If specified, :program:`llvm-link` prints a human-readable version of the + output bitcode file to standard error. .. option:: -help @@ -67,8 +53,4 @@ EXIT STATUS If :program:`llvm-link` succeeds, it will exit with 0. Otherwise, if an error occurs, it will exit with a non-zero value. -SEE ALSO --------- - -gccld diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst index 87add670af..681777c12d 100644 --- a/docs/CompilerWriterInfo.rst +++ b/docs/CompilerWriterInfo.rst @@ -107,6 +107,12 @@ OS X * `Mach-O Runtime Architecture <http://developer.apple.com/documentation/Darwin/RuntimeArchitecture-date.html>`_ * `Notes on Mach-O ABI <http://www.unsanity.org/archives/000044.php>`_ +NVPTX +===== + +* `CUDA Documentation <http://docs.nvidia.com/cuda/index.html>`_ includes the PTX + ISA and Driver API documentation + Miscellaneous Resources ======================= diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index 539c75e2d7..0bbbafc6e6 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -521,13 +521,13 @@ By placing it in the ``llvm/projects``, it will be automatically configured by the LLVM configure script as well as automatically updated when you run ``svn update``. -GIT mirror +Git Mirror ---------- -GIT mirrors are available for a number of LLVM subprojects. These mirrors sync +Git mirrors are available for a number of LLVM subprojects. These mirrors sync automatically with each Subversion commit and contain all necessary git-svn marks (so, you can recreate git-svn metadata locally). Note that right now -mirrors reflect only ``trunk`` for each project. You can do the read-only GIT +mirrors reflect only ``trunk`` for each project. You can do the read-only Git clone of LLVM via: .. code-block:: console @@ -538,10 +538,23 @@ If you want to check out clang too, run: .. code-block:: console - % git clone http://llvm.org/git/llvm.git % cd llvm/tools % git clone http://llvm.org/git/clang.git +If you want to check out compiler-rt too, run: + +.. code-block:: console + + % cd llvm/projects + % git clone http://llvm.org/git/compiler-rt.git + +If you want to check out the Test Suite Source Code (optional), run: + +.. code-block:: console + + % cd llvm/projects + % git clone http://llvm.org/git/test-suite.git + Since the upstream repository is in Subversion, you should use ``git pull --rebase`` instead of ``git pull`` to avoid generating a non-linear history in your clone. To configure ``git pull`` to pass ``--rebase`` by default on the @@ -626,8 +639,10 @@ To set up clone from which you can submit code using ``git-svn``, run: % git config svn-remote.svn.fetch :refs/remotes/origin/master % git svn rebase -l +Likewise for compiler-rt and test-suite. + To update this clone without generating git-svn tags that conflict with the -upstream git repo, run: +upstream Git repo, run: .. code-block:: console @@ -638,12 +653,14 @@ upstream git repo, run: git checkout master && git svn rebase -l) +Likewise for compiler-rt and test-suite. + This leaves your working directories on their master branches, so you'll need to ``checkout`` each working branch individually and ``rebase`` it on top of its parent branch. For those who wish to be able to update an llvm repo in a simpler fashion, -consider placing the following git script in your path under the name +consider placing the following Git script in your path under the name ``git-svnup``: .. code-block:: bash @@ -991,7 +1008,7 @@ Optional Configuration Items ---------------------------- If you're running on a Linux system that supports the `binfmt_misc -<http://www.tat.physik.uni-tuebingen.de/~rguenth/linux/binfmt_misc.html>`_ +<http://en.wikipedia.org/wiki/binfmt_misc>`_ module, and you have root access on the system, you can set your system up to execute LLVM bitcode files directly. To do this, use commands like this (the first command may not be required if you are already using the module): diff --git a/docs/HowToSetUpLLVMStyleRTTI.rst b/docs/HowToSetUpLLVMStyleRTTI.rst index b906b25621..e0f865a141 100644 --- a/docs/HowToSetUpLLVMStyleRTTI.rst +++ b/docs/HowToSetUpLLVMStyleRTTI.rst @@ -295,6 +295,78 @@ ordering right:: | OtherSpecialSquare | Circle +A Bug to be Aware Of +-------------------- + +The example just given opens the door to bugs where the ``classof``\s are +not updated to match the ``Kind`` enum when adding (or removing) classes to +(from) the hierarchy. + +Continuing the example above, suppose we add a ``SomewhatSpecialSquare`` as +a subclass of ``Square``, and update the ``ShapeKind`` enum like so: + +.. code-block:: c++ + + enum ShapeKind { + SK_Square, + SK_SpecialSquare, + SK_OtherSpecialSquare, + + SK_SomewhatSpecialSquare, + SK_Circle + } + +Now, suppose that we forget to update ``Square::classof()``, so it still +looks like: + +.. code-block:: c++ + + static bool classof(const Shape *S) { + // BUG: Returns false when S->getKind() == SK_SomewhatSpecialSquare, + // even though SomewhatSpecialSquare "is a" Square. + return S->getKind() >= SK_Square && + S->getKind() <= SK_OtherSpecialSquare; + } + +As the comment indicates, this code contains a bug. A straightforward and +non-clever way to avoid this is to introduce an explicit ``SK_LastSquare`` +entry in the enum when adding the first subclass(es). For example, we could +rewrite the example at the beginning of `Concrete Bases and Deeper +Hierarchies`_ as: + +.. code-block:: c++ + + enum ShapeKind { + SK_Square, + + SK_SpecialSquare, + + SK_OtherSpecialSquare, + + SK_LastSquare, + SK_Circle + } + ... + // Square::classof() + - static bool classof(const Shape *S) { + - return S->getKind() == SK_Square; + - } + + static bool classof(const Shape *S) { + + return S->getKind() >= SK_Square && + + S->getKind() <= SK_LastSquare; + + } + +Then, adding new subclasses is easy: + +.. code-block:: c++ + + enum ShapeKind { + SK_Square, + SK_SpecialSquare, + SK_OtherSpecialSquare, + + SK_SomewhatSpecialSquare, + SK_LastSquare, + SK_Circle + } + +Notice that ``Square::classof`` does not need to be changed. + .. _classof-contract: The Contract of ``classof`` diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 03004f66df..659f02afb9 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -8342,6 +8342,46 @@ strings. This can be useful for special purpose optimizations that want to look for these annotations. These have no other defined use; they are ignored by code generation and optimization. +'``llvm.ptr.annotation.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use '``llvm.ptr.annotation``' on a +pointer to an integer of any width. *NOTE* you must specify an address space for +the pointer. The identifier for the default address space is the integer +'``0``'. + +:: + + declare i8* @llvm.ptr.annotation.p<address space>i8(i8* <val>, i8* <str>, i8* <str>, i32 <int>) + declare i16* @llvm.ptr.annotation.p<address space>i16(i16* <val>, i8* <str>, i8* <str>, i32 <int>) + declare i32* @llvm.ptr.annotation.p<address space>i32(i32* <val>, i8* <str>, i8* <str>, i32 <int>) + declare i64* @llvm.ptr.annotation.p<address space>i64(i64* <val>, i8* <str>, i8* <str>, i32 <int>) + declare i256* @llvm.ptr.annotation.p<address space>i256(i256* <val>, i8* <str>, i8* <str>, i32 <int>) + +Overview: +""""""""" + +The '``llvm.ptr.annotation``' intrinsic. + +Arguments: +"""""""""" + +The first argument is a pointer to an integer value of arbitrary bitwidth +(result of some expression), the second is a pointer to a global string, the +third is a pointer to a global string which is the source file name, and the +last argument is the line number. It returns the value of the first argument. + +Semantics: +"""""""""" + +This intrinsic allows annotation of a pointer to an integer with arbitrary +strings. This can be useful for special purpose optimizations that want to look +for these annotations. These have no other defined use; they are ignored by code +generation and optimization. + '``llvm.annotation.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/NVPTXUsage.rst b/docs/NVPTXUsage.rst new file mode 100644 index 0000000000..5451619686 --- /dev/null +++ b/docs/NVPTXUsage.rst @@ -0,0 +1,276 @@ +============================= +User Guide for NVPTX Back-end +============================= + +.. contents:: + :local: + :depth: 3 + + +Introduction +============ + +To support GPU programming, the NVPTX back-end supports a subset of LLVM IR +along with a defined set of conventions used to represent GPU programming +concepts. This document provides an overview of the general usage of the back- +end, including a description of the conventions used and the set of accepted +LLVM IR. + +.. note:: + + This document assumes a basic familiarity with CUDA and the PTX + assembly language. Information about the CUDA Driver API and the PTX assembly + language can be found in the `CUDA documentation + <http://docs.nvidia.com/cuda/index.html>`_. + + + +Conventions +=========== + +Marking Functions as Kernels +---------------------------- + +In PTX, there are two types of functions: *device functions*, which are only +callable by device code, and *kernel functions*, which are callable by host +code. By default, the back-end will emit device functions. Metadata is used to +declare a function as a kernel function. This metadata is attached to the +``nvvm.annotations`` named metadata object, and has the following format: + +.. code-block:: llvm + + !0 = metadata !{<function-ref>, metadata !"kernel", i32 1} + +The first parameter is a reference to the kernel function. The following +example shows a kernel function calling a device function in LLVM IR. The +function ``@my_kernel`` is callable from host code, but ``@my_fmad`` is not. + +.. code-block:: llvm + + define float @my_fmad(float %x, float %y, float %z) { + %mul = fmul float %x, %y + %add = fadd float %mul, %z + ret float %add + } + + define void @my_kernel(float* %ptr) { + %val = load float* %ptr + %ret = call float @my_fmad(float %val, float %val, float %val) + store float %ret, float* %ptr + ret void + } + + !nvvm.annotations = !{!1} + !1 = metadata !{void (float*)* @my_kernel, metadata !"kernel", i32 1} + +When compiled, the PTX kernel functions are callable by host-side code. + + +Address Spaces +-------------- + +The NVPTX back-end uses the following address space mapping: + + ============= ====================== + Address Space Memory Space + ============= ====================== + 0 Generic + 1 Global + 2 Internal Use + 3 Shared + 4 Constant + 5 Local + ============= ====================== + +Every global variable and pointer type is assigned to one of these address +spaces, with 0 being the default address space. Intrinsics are provided which +can be used to convert pointers between the generic and non-generic address +spaces. + +As an example, the following IR will define an array ``@g`` that resides in +global device memory. + +.. code-block:: llvm + + @g = internal addrspace(1) global [4 x i32] [ i32 0, i32 1, i32 2, i32 3 ] + +LLVM IR functions can read and write to this array, and host-side code can +copy data to it by name with the CUDA Driver API. + +Note that since address space 0 is the generic space, it is illegal to have +global variables in address space 0. Address space 0 is the default address +space in LLVM, so the ``addrspace(N)`` annotation is *required* for global +variables. + + +NVPTX Intrinsics +================ + +Address Space Conversion +------------------------ + +'``llvm.nvvm.ptr.*.to.gen``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare i8* @llvm.nvvm.ptr.global.to.gen.p0i8.p1i8(i8 addrspace(1)*) + declare i8* @llvm.nvvm.ptr.shared.to.gen.p0i8.p3i8(i8 addrspace(3)*) + declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*) + declare i8* @llvm.nvvm.ptr.local.to.gen.p0i8.p5i8(i8 addrspace(5)*) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.*.to.gen``' intrinsics convert a pointer in a non-generic +address space to a generic address space pointer. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid generic address space +pointer. + + +'``llvm.nvvm.ptr.gen.to.*``' Intrinsics +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +These are overloaded intrinsics. You can use these on any pointer types. + +.. code-block:: llvm + + declare i8* @llvm.nvvm.ptr.gen.to.global.p1i8.p0i8(i8 addrspace(1)*) + declare i8* @llvm.nvvm.ptr.gen.to.shared.p3i8.p0i8(i8 addrspace(3)*) + declare i8* @llvm.nvvm.ptr.gen.to.constant.p4i8.p0i8(i8 addrspace(4)*) + declare i8* @llvm.nvvm.ptr.gen.to.local.p5i8.p0i8(i8 addrspace(5)*) + +Overview: +""""""""" + +The '``llvm.nvvm.ptr.gen.to.*``' intrinsics convert a pointer in the generic +address space to a pointer in the target address space. Note that these +intrinsics are only useful if the address space of the target address space of +the pointer is known. It is not legal to use address space conversion +intrinsics to convert a pointer from one non-generic address space to another +non-generic address space. + +Semantics: +"""""""""" + +These intrinsics modify the pointer value to be a valid pointer in the target +non-generic address space. + + +Reading PTX Special Registers +----------------------------- + +'``llvm.nvvm.read.ptx.sreg.*``' +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() + declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() + declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() + declare i32 @llvm.nvvm.read.ptx.sreg.warpsize() + +Overview: +""""""""" + +The '``@llvm.nvvm.read.ptx.sreg.*``' intrinsics provide access to the PTX +special registers, in particular the kernel launch bounds. These registers +map in the following way to CUDA builtins: + + ============ ===================================== + CUDA Builtin PTX Special Register Intrinsic + ============ ===================================== + ``threadId`` ``@llvm.nvvm.read.ptx.sreg.tid.*`` + ``blockIdx`` ``@llvm.nvvm.read.ptx.sreg.ctaid.*`` + ``blockDim`` ``@llvm.nvvm.read.ptx.sreg.ntid.*`` + ``gridDim`` ``@llvm.nvvm.read.ptx.sreg.nctaid.*`` + ============ ===================================== + + +Barriers +-------- + +'``llvm.nvvm.barrier0``' +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +.. code-block:: llvm + + declare void @llvm.nvvm.barrier0() + +Overview: +""""""""" + +The '``@llvm.nvvm.barrier0()``' intrinsic emits a PTX ``bar.sync 0`` +instruction, equivalent to the ``__syncthreads()`` call in CUDA. + + +Other Intrinsics +---------------- + +For the full set of NVPTX intrinsics, please see the +``include/llvm/IR/IntrinsicsNVVM.td`` file in the LLVM source tree. + + +Executing PTX +============= + +The most common way to execute PTX assembly on a GPU device is to use the CUDA +Driver API. This API is a low-level interface to the GPU driver and allows for +JIT compilation of PTX code to native GPU machine code. + +Initializing the Driver API: + +.. code-block:: c++ + + CUdevice device; + CUcontext context; + + // Initialize the driver API + cuInit(0); + // Get a handle to the first compute device + cuDeviceGet(&device, 0); + // Create a compute device context + cuCtxCreate(&context, 0, device); + +JIT compiling a PTX string to a device binary: + +.. code-block:: c++ + + CUmodule module; + CUfunction funcion; + + // JIT compile a null-terminated PTX string + cuModuleLoadData(&module, (void*)PTXString); + + // Get a handle to the "myfunction" kernel function + cuModuleGetFunction(&function, module, "myfunction"); + +For full examples of executing PTX assembly, please see the `CUDA Samples +<https://developer.nvidia.com/cuda-downloads>`_ distribution. diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index 4fc4597933..7864165617 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -626,6 +626,33 @@ SmallVectors are most useful when on the stack. SmallVector also provides a nice portable and efficient replacement for ``alloca``. +.. note:: + + Prefer to use ``SmallVectorImpl<T>`` as a parameter type. + + In APIs that don't care about the "small size" (most?), prefer to use + the ``SmallVectorImpl<T>`` class, which is basically just the "vector + header" (and methods) without the elements allocated after it. Note that + ``SmallVector<T, N>`` inherits from ``SmallVectorImpl<T>`` so the + conversion is implicit and costs nothing. E.g. + + .. code-block:: c++ + + // BAD: Clients cannot pass e.g. SmallVector<Foo, 4>. + hardcodedSmallSize(SmallVector<Foo, 2> &Out); + // GOOD: Clients can pass any SmallVector<Foo, N>. + allowsAnySmallSize(SmallVectorImpl<Foo> &Out); + + void someFunc() { + SmallVector<Foo, 8> Vec; + hardcodedSmallSize(Vec); // Error. + allowsAnySmallSize(Vec); // Works. + } + + Even though it has "``Impl``" in the name, this is so widely used that + it really isn't "private to the implementation" anymore. A name like + ``SmallVectorHeader`` would be more appropriate. + .. _dss_vector: <vector> @@ -989,7 +1016,9 @@ coupled with a good choice of :ref:`sequential container <ds_sequential>`. This combination provides the several nice properties: the result data is contiguous in memory (good for cache locality), has few allocations, is easy to address (iterators in the final vector are just indices or pointers), and can be -efficiently queried with a standard binary or radix search. +efficiently queried with a standard binary search (e.g. +``std::lower_bound``; if you want the whole range of elements comparing +equal, use ``std::equal_range``). .. _dss_smallset: diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst index 9383c5b3fa..3ca5560254 100644 --- a/docs/ReleaseNotes.rst +++ b/docs/ReleaseNotes.rst @@ -64,6 +64,12 @@ Non-comprehensive list of changes in this release attributes, which are useful for passing information to code generation. See :doc:`HowToUseAttributes` for more details. +* TableGen's syntax for instruction selection patterns has been simplified. + Instead of specifying types indirectly with register classes, you should now + specify types directly in the input patterns. See ``SparcInstrInfo.td`` for + examples of the new syntax. The old syntax using register classes still + works, but it will be removed in a future LLVM release. + * ... next change ... .. NOTE @@ -90,6 +96,13 @@ in fairly early stages, but we expect successful compilation when: Some additional functionality is also implemented, notably DWARF debugging, GNU-style thread local storage and inline assembly. +Hexagon Target +-------------- + +- Removed support for legacy hexagonv2 and hexagonv3 processor + architectures which are no longer in use. Currently supported + architectures are hexagonv4 and hexagonv5. + Loop Vectorizer --------------- diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst index 78ce4e0e53..857479508a 100644 --- a/docs/SourceLevelDebugging.rst +++ b/docs/SourceLevelDebugging.rst @@ -1811,11 +1811,11 @@ values, we can clarify the contents of the ``BUCKETS``, ``HASHES`` and | HEADER.header_data_len | uint32_t | HEADER_DATA | HeaderData |-------------------------| - | BUCKETS | uint32_t[bucket_count] // 32 bit hash indexes + | BUCKETS | uint32_t[n_buckets] // 32 bit hash indexes |-------------------------| - | HASHES | uint32_t[hashes_count] // 32 bit hash values + | HASHES | uint32_t[n_hashes] // 32 bit hash values |-------------------------| - | OFFSETS | uint32_t[hashes_count] // 32 bit offsets to hash value data + | OFFSETS | uint32_t[n_hashes] // 32 bit offsets to hash value data |-------------------------| | ALL HASH DATA | `-------------------------' @@ -2080,23 +2080,23 @@ array to be: HeaderData.atoms[0].form = DW_FORM_data4; This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is - encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have - multiple matching DIEs in a single file, which could come up with an inlined - function for instance. Future tables could include more information about the - DIE such as flags indicating if the DIE is a function, method, block, - or inlined. +encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have +multiple matching DIEs in a single file, which could come up with an inlined +function for instance. Future tables could include more information about the +DIE such as flags indicating if the DIE is a function, method, block, +or inlined. The KeyType for the DWARF table is a 32 bit string table offset into the - ".debug_str" table. The ".debug_str" is the string table for the DWARF which - may already contain copies of all of the strings. This helps make sure, with - help from the compiler, that we reuse the strings between all of the DWARF - sections and keeps the hash table size down. Another benefit to having the - compiler generate all strings as DW_FORM_strp in the debug info, is that - DWARF parsing can be made much faster. +".debug_str" table. The ".debug_str" is the string table for the DWARF which +may already contain copies of all of the strings. This helps make sure, with +help from the compiler, that we reuse the strings between all of the DWARF +sections and keeps the hash table size down. Another benefit to having the +compiler generate all strings as DW_FORM_strp in the debug info, is that +DWARF parsing can be made much faster. After a lookup is made, we get an offset into the hash data. The hash data - needs to be able to deal with 32 bit hash collisions, so the chunk of data - at the offset in the hash data consists of a triple: +needs to be able to deal with 32 bit hash collisions, so the chunk of data +at the offset in the hash data consists of a triple: .. code-block:: c @@ -2105,7 +2105,7 @@ After a lookup is made, we get an offset into the hash data. The hash data HashData[hash_data_count] If "str_offset" is zero, then the bucket contents are done. 99.9% of the - hash data chunks contain a single item (no 32 bit hash collision): +hash data chunks contain a single item (no 32 bit hash collision): .. code-block:: none diff --git a/docs/TableGen/LangRef.rst b/docs/TableGen/LangRef.rst index c9e1efba03..bd28a9031d 100644 --- a/docs/TableGen/LangRef.rst +++ b/docs/TableGen/LangRef.rst @@ -286,7 +286,7 @@ given values. .. productionlist:: SimpleValue: "(" `DagArg` `DagArgList` ")" DagArgList: `DagArg` ("," `DagArg`)* - DagArg: `Value` [":" `TokVarName`] + DagArg: `Value` [":" `TokVarName`] | `TokVarName` The initial :token:`DagArg` is called the "operator" of the dag. diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst index 4d8c8ce307..79cedee764 100644 --- a/docs/TestingGuide.rst +++ b/docs/TestingGuide.rst @@ -224,16 +224,7 @@ Below is an example of legal RUN lines in a ``.ll`` file: ; RUN: diff %t1 %t2 As with a Unix shell, the RUN lines permit pipelines and I/O -redirection to be used. However, the usage is slightly different than -for Bash. In general, it's useful to read the code of other tests to figure out -what you can use in yours. The major differences are: - -- You can't do ``2>&1``. That will cause :program:`lit` to write to a file - named ``&1``. Usually this is done to get stderr to go through a pipe. You - can do that with ``|&`` so replace this idiom: - ``... 2>&1 | FileCheck`` with ``... |& FileCheck`` -- You can only redirect to a file, not to another descriptor and not - from a here document. +redirection to be used. There are some quoting rules that you must pay attention to when writing your RUN lines. In general nothing needs to be quoted. :program:`lit` won't @@ -243,7 +234,7 @@ everything enclosed as one value. In general, you should strive to keep your RUN lines as simple as possible, using them only to run tools that generate textual output you can then examine. -The recommended way to examine output to figure out if the test passes it using +The recommended way to examine output to figure out if the test passes is using the :doc:`FileCheck tool <CommandGuide/FileCheck>`. *[The usage of grep in RUN lines is deprecated - please do not send or commit patches that use it.]* diff --git a/docs/WritingAnLLVMBackend.rst b/docs/WritingAnLLVMBackend.rst index 6d6c2a1070..a03a5e42c2 100644 --- a/docs/WritingAnLLVMBackend.rst +++ b/docs/WritingAnLLVMBackend.rst @@ -760,7 +760,7 @@ target description file (``IntRegs``). def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRrr:$addr))]>; + [(set i32:$dst, (load ADDRrr:$addr))]>; The fourth parameter is the input source, which uses the address operand ``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``: @@ -788,7 +788,7 @@ class is defined: def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRri:$addr))]>; + [(set i32:$dst, (load ADDRri:$addr))]>; Writing these definitions for so many similar instructions can involve a lot of cut and paste. In ``.td`` files, the ``multiclass`` directive enables the @@ -803,11 +803,11 @@ pattern ``F3_12`` is defined to create 2 instruction classes each time def rr : F3_1 <2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; def ri : F3_2 <2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>; + [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>; } So when the ``defm`` directive is used for the ``XOR`` and ``ADD`` @@ -856,7 +856,7 @@ format instruction having three operands. def XNORrr : F3_1<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "xnor $b, $c, $dst", - [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>; + [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>; The instruction templates in ``SparcInstrFormats.td`` show the base class for ``F3_1`` is ``InstSP``. @@ -1124,7 +1124,7 @@ a pattern with the store DAG operator. .. code-block:: llvm def STrr : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src), - "st $src, [$addr]", [(store IntRegs:$src, ADDRrr:$addr)]>; + "st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>; ``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``: @@ -1185,7 +1185,7 @@ instruction. SDValue CPTmp0; SDValue CPTmp1; - // Pattern: (st:void IntRegs:i32:$src, + // Pattern: (st:void i32:i32:$src, // ADDRrr:i32:$addr)<<P:Predicate_store>> // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src) // Pattern complexity = 13 cost = 1 size = 0 diff --git a/docs/index.rst b/docs/index.rst index 8f22ef2a77..c3bb8089da 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -22,7 +22,6 @@ Several introductory papers and presentations. :hidden: LangRef - GetElementPtr :doc:`LangRef` Defines the LLVM intermediate representation. @@ -48,10 +47,6 @@ Several introductory papers and presentations. .. __: http://llvm.org/pubs/2002-12-LattnerMSThesis.html -:doc:`GetElementPtr` - Answers to some very frequent questions about LLVM's most frequently - misunderstood instruction. - `Publications mentioning LLVM <http://llvm.org/pubs>`_ .. @@ -72,7 +67,6 @@ representation. CMake HowToBuildOnARM CommandGuide/index - DeveloperPolicy GettingStarted GettingStartedVS FAQ @@ -87,6 +81,7 @@ representation. ReleaseNotes Passes YamlIO + GetElementPtr :doc:`GettingStarted` Discusses how to get up and running quickly with the LLVM infrastructure. @@ -108,9 +103,6 @@ representation. Tutorials about using LLVM. Includes a tutorial about making a custom language with LLVM. -:doc:`DeveloperPolicy` - The LLVM project's policy towards developers and their contributions. - :doc:`LLVM Command Guide <CommandGuide/index>` A reference manual for the LLVM command line utilities ("man" pages for LLVM tools). @@ -149,25 +141,9 @@ representation. :doc:`YamlIO` A reference guide for using LLVM's YAML I/O library. -IRC -=== - -Users and developers of the LLVM project (including subprojects such as Clang) -can be found in #llvm on `irc.oftc.net <irc://irc.oftc.net/llvm>`_. - -This channel has several bots. - -* Buildbot reporters - - * llvmbb - Bot for the main LLVM buildbot master. - http://lab.llvm.org:8011/console - * bb-chapuni - An individually run buildbot master. http://bb.pgr.jp/console - * smooshlab - Apple's internal buildbot master. - -* robot - Bugzilla linker. %bug <number> - -* clang-bot - A `geordi <http://www.eelis.net/geordi/>`_ instance running - near-trunk clang instead of gcc. +:doc:`GetElementPtr` + Answers to some very frequent questions about LLVM's most frequently + misunderstood instruction. Programming Documentation ========================= @@ -248,6 +224,7 @@ For API clients and LLVM developers. WritingAnLLVMPass TableGen/LangRef HowToUseAttributes + NVPTXUsage :doc:`WritingAnLLVMPass` Information on how to write LLVM transformations and analyses. @@ -316,6 +293,10 @@ For API clients and LLVM developers. :doc:`HowToUseAttributes` Answers some questions about the new Attributes infrastructure. +:doc:`NVPTXUsage` + This document describes using the NVPTX back-end to compile GPU kernels. + + Development Process Documentation ================================= @@ -324,12 +305,16 @@ Information about LLVM's development process. .. toctree:: :hidden: + DeveloperPolicy MakefileGuide Projects LLVMBuild HowToReleaseLLVM Packaging +:doc:`DeveloperPolicy` + The LLVM project's policy towards developers and their contributions. + :doc:`Projects` How-to guide and templates for new projects that *use* the LLVM infrastructure. The templates (directory organization, Makefiles, and test @@ -349,46 +334,75 @@ Information about LLVM's development process. :doc:`Packaging` Advice on packaging LLVM into a distribution. +Community +========= + +LLVM has a thriving community of friendly and helpful developers. +The two primary communication mechanisms in the LLVM community are mailing +lists and IRC. + Mailing Lists -============= +------------- If you can't find what you need in these docs, try consulting the mailing lists. -`LLVM Announcements List`__ - This is a low volume list that provides important announcements regarding - LLVM. It gets email about once a month. - - .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce - -`Developer's List`__ +`Developer's List (llvmdev)`__ This list is for people who want to be included in technical discussions of LLVM. People post to this list when they have questions about writing code for or using the LLVM tools. It is relatively low volume. .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev -`Bugs & Patches Archive`__ - This list gets emailed every time a bug is opened and closed, and when people - submit patches to be included in LLVM. It is higher volume than the LLVMdev - list. - - .. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/ - -`Commits Archive`__ +`Commits Archive (llvm-commits)`__ This list contains all commit messages that are made when LLVM developers - commit code changes to the repository. It is useful for those who want to - stay on the bleeding edge of LLVM development. This list is very high volume. + commit code changes to the repository. It also serves as a forum for + patch review (i.e. send patches here). It is useful for those who want to + stay on the bleeding edge of LLVM development. This list is very high + volume. .. __: http://lists.cs.uiuc.edu/pipermail/llvm-commits/ -`Test Results Archive`__ +`Bugs & Patches Archive (llvmbugs)`__ + This list gets emailed every time a bug is opened and closed. It is + higher volume than the LLVMdev list. + + .. __: http://lists.cs.uiuc.edu/pipermail/llvmbugs/ + +`Test Results Archive (llvm-testresults)`__ A message is automatically sent to this list by every active nightly tester when it completes. As such, this list gets email several times each day, making it a high volume list. .. __: http://lists.cs.uiuc.edu/pipermail/llvm-testresults/ +`LLVM Announcements List (llvm-announce)`__ + This is a low volume list that provides important announcements regarding + LLVM. It gets email about once a month. + + .. __: http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce + +IRC +--- + +Users and developers of the LLVM project (including subprojects such as Clang) +can be found in #llvm on `irc.oftc.net <irc://irc.oftc.net/llvm>`_. + +This channel has several bots. + +* Buildbot reporters + + * llvmbb - Bot for the main LLVM buildbot master. + http://lab.llvm.org:8011/console + * bb-chapuni - An individually run buildbot master. http://bb.pgr.jp/console + * smooshlab - Apple's internal buildbot master. + +* robot - Bugzilla linker. %bug <number> + +* clang-bot - A `geordi <http://www.eelis.net/geordi/>`_ instance running + near-trunk clang instead of gcc. + + Indices and tables ================== diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h index 42e123930d..11be4c513e 100644 --- a/include/llvm/ADT/APSInt.h +++ b/include/llvm/ADT/APSInt.h @@ -161,11 +161,11 @@ public: } APSInt& operator++() { - static_cast<APInt&>(*this)++; + ++(static_cast<APInt&>(*this)); return *this; } APSInt& operator--() { - static_cast<APInt&>(*this)--; + --(static_cast<APInt&>(*this)); return *this; } APSInt operator++(int) { diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h index 9e5ab021a5..b8b8861995 100644 --- a/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -226,13 +226,13 @@ namespace llvm { template<class T> struct simplify_type<IntrusiveRefCntPtr<T> > { typedef T* SimpleType; - static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) { + static SimpleType getSimplifiedValue(IntrusiveRefCntPtr<T>& Val) { return Val.getPtr(); } }; template<class T> struct simplify_type<const IntrusiveRefCntPtr<T> > { - typedef T* SimpleType; + typedef /*const*/ T* SimpleType; static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) { return Val.getPtr(); } diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h index 81d73ed8b9..194e53fac2 100644 --- a/include/llvm/ADT/Optional.h +++ b/include/llvm/ADT/Optional.h @@ -128,20 +128,6 @@ public: #endif }; -template<typename T> struct simplify_type; - -template <typename T> -struct simplify_type<const Optional<T> > { - typedef const T* SimpleType; - static SimpleType getSimplifiedValue(const Optional<T> &Val) { - return Val.getPointer(); - } -}; - -template <typename T> -struct simplify_type<Optional<T> > - : public simplify_type<const Optional<T> > {}; - template <typename T> struct isPodLike; template <typename T> struct isPodLike<Optional<T> > { // An Optional<T> is pod-like if T is. diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index 7f6350e444..59fa3f39c9 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -260,7 +260,7 @@ class ReversePostOrderTraversal { typedef typename GT::NodeType NodeType; std::vector<NodeType*> Blocks; // Block list in normal PO order inline void Initialize(NodeType *BB) { - copy(po_begin(BB), po_end(BB), back_inserter(Blocks)); + std::copy(po_begin(BB), po_end(BB), std::back_inserter(Blocks)); } public: typedef typename std::vector<NodeType*>::reverse_iterator rpo_iterator; diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h index 62620fa267..652492a153 100644 --- a/include/llvm/ADT/SmallBitVector.h +++ b/include/llvm/ADT/SmallBitVector.h @@ -178,9 +178,9 @@ public: unsigned count() const { if (isSmall()) { uintptr_t Bits = getSmallBits(); - if (sizeof(uintptr_t) * CHAR_BIT == 32) + if (NumBaseBits == 32) return CountPopulation_32(Bits); - if (sizeof(uintptr_t) * CHAR_BIT == 64) + if (NumBaseBits == 64) return CountPopulation_64(Bits); llvm_unreachable("Unsupported!"); } @@ -215,9 +215,9 @@ public: uintptr_t Bits = getSmallBits(); if (Bits == 0) return -1; - if (sizeof(uintptr_t) * CHAR_BIT == 32) + if (NumBaseBits == 32) return CountTrailingZeros_32(Bits); - if (sizeof(uintptr_t) * CHAR_BIT == 64) + if (NumBaseBits == 64) return CountTrailingZeros_64(Bits); llvm_unreachable("Unsupported!"); } @@ -233,9 +233,9 @@ public: Bits &= ~uintptr_t(0) << (Prev + 1); if (Bits == 0 || Prev + 1 >= getSmallSize()) return -1; - if (sizeof(uintptr_t) * CHAR_BIT == 32) + if (NumBaseBits == 32) return CountTrailingZeros_32(Bits); - if (sizeof(uintptr_t) * CHAR_BIT == 64) + if (NumBaseBits == 64) return CountTrailingZeros_64(Bits); llvm_unreachable("Unsupported!"); } diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index 3bb883088c..8c7304197f 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -54,8 +54,6 @@ protected: /// then the set is in 'small mode'. const void **CurArray; /// CurArraySize - The allocated size of CurArray, always a power of two. - /// Note that CurArray points to an array that has CurArraySize+1 elements in - /// it, so that the end iterator actually points to valid memory. unsigned CurArraySize; // If small, this is # elts allocated consecutively @@ -68,9 +66,6 @@ protected: SmallArray(SmallStorage), CurArray(SmallStorage), CurArraySize(SmallSize) { assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 && "Initial size must be a power of two!"); - // The end pointer, always valid, is set to a valid element to help the - // iterator. - CurArray[SmallSize] = 0; clear(); } ~SmallPtrSetImpl(); @@ -147,9 +142,11 @@ protected: class SmallPtrSetIteratorImpl { protected: const void *const *Bucket; + const void *const *End; public: - explicit SmallPtrSetIteratorImpl(const void *const *BP) : Bucket(BP) { - AdvanceIfNotValid(); + explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E) + : Bucket(BP), End(E) { + AdvanceIfNotValid(); } bool operator==(const SmallPtrSetIteratorImpl &RHS) const { @@ -164,8 +161,10 @@ protected: /// that is. This is guaranteed to stop because the end() bucket is marked /// valid. void AdvanceIfNotValid() { - while (*Bucket == SmallPtrSetImpl::getEmptyMarker() || - *Bucket == SmallPtrSetImpl::getTombstoneMarker()) + assert(Bucket <= End); + while (Bucket != End && + (*Bucket == SmallPtrSetImpl::getEmptyMarker() || + *Bucket == SmallPtrSetImpl::getTombstoneMarker())) ++Bucket; } }; @@ -182,12 +181,13 @@ public: typedef std::ptrdiff_t difference_type; typedef std::forward_iterator_tag iterator_category; - explicit SmallPtrSetIterator(const void *const *BP) - : SmallPtrSetIteratorImpl(BP) {} + explicit SmallPtrSetIterator(const void *const *BP, const void *const *E) + : SmallPtrSetIteratorImpl(BP, E) {} // Most methods provided by baseclass. const PtrTy operator*() const { + assert(Bucket < End); return PtrTraits::getFromVoidPointer(const_cast<void*>(*Bucket)); } @@ -236,9 +236,8 @@ template<class PtrType, unsigned SmallSize> class SmallPtrSet : public SmallPtrSetImpl { // Make sure that SmallSize is a power of two, round up if not. enum { SmallSizePowTwo = RoundUpToPowerOfTwo<SmallSize>::Val }; - /// SmallStorage - Fixed size storage used in 'small mode'. The extra element - /// ensures that the end iterator actually points to valid memory. - const void *SmallStorage[SmallSizePowTwo+1]; + /// SmallStorage - Fixed size storage used in 'small mode'. + const void *SmallStorage[SmallSizePowTwo]; typedef PointerLikeTypeTraits<PtrType> PtrTraits; public: SmallPtrSet() : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) {} @@ -275,10 +274,10 @@ public: typedef SmallPtrSetIterator<PtrType> iterator; typedef SmallPtrSetIterator<PtrType> const_iterator; inline iterator begin() const { - return iterator(CurArray); + return iterator(CurArray, CurArray+CurArraySize); } inline iterator end() const { - return iterator(CurArray+CurArraySize); + return iterator(CurArray+CurArraySize, CurArray+CurArraySize); } // Allow assignment from any smallptrset with the same element type even if it diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index 9167f87218..7ba0a714bf 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -16,6 +16,7 @@ #include "llvm/Support/AlignOf.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/type_traits.h" #include <algorithm> #include <cassert> @@ -267,7 +268,8 @@ template <typename T, bool isPodLike> void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) { size_t CurCapacity = this->capacity(); size_t CurSize = this->size(); - size_t NewCapacity = 2*CurCapacity + 1; // Always grow, even from zero. + // Always grow, even from zero. + size_t NewCapacity = size_t(NextPowerOf2(CurCapacity+2)); if (NewCapacity < MinSize) NewCapacity = MinSize; T *NewElts = static_cast<T*>(malloc(NewCapacity*sizeof(T))); diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h index b69a964a23..7bea577f34 100644 --- a/include/llvm/ADT/StringSet.h +++ b/include/llvm/ADT/StringSet.h @@ -18,23 +18,25 @@ namespace llvm { - /// StringSet - A wrapper for StringMap that provides set-like - /// functionality. Only insert() and count() methods are used by my - /// code. + /// StringSet - A wrapper for StringMap that provides set-like functionality. template <class AllocatorTy = llvm::MallocAllocator> class StringSet : public llvm::StringMap<char, AllocatorTy> { typedef llvm::StringMap<char, AllocatorTy> base; public: - bool insert(StringRef InLang) { - assert(!InLang.empty()); - const char *KeyStart = InLang.data(); - const char *KeyEnd = KeyStart + InLang.size(); - llvm::StringMapEntry<char> *Entry = llvm::StringMapEntry<char>:: - Create(KeyStart, KeyEnd, base::getAllocator(), '+'); - if (!base::insert(Entry)) { - Entry->Destroy(base::getAllocator()); + + /// insert - Insert the specified key into the set. If the key already + /// exists in the set, return false and ignore the request, otherwise insert + /// it and return true. + bool insert(StringRef Key) { + // Get or create the map entry for the key; if it doesn't exist the value + // type will be default constructed which we use to detect insert. + // + // We use '+' as the sentinel value in the map. + assert(!Key.empty()); + StringMapEntry<char> &Entry = this->GetOrCreateValue(Key); + if (Entry.getValue() == '+') return false; - } + Entry.setValue('+'); return true; } }; diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h index aeb78484c6..71dab2ef55 100644 --- a/include/llvm/ADT/ilist.h +++ b/include/llvm/ADT/ilist.h @@ -274,12 +274,12 @@ template<typename From> struct simplify_type; template<typename NodeTy> struct simplify_type<ilist_iterator<NodeTy> > { typedef NodeTy* SimpleType; - static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) { + static SimpleType getSimplifiedValue(ilist_iterator<NodeTy> &Node) { return &*Node; } }; template<typename NodeTy> struct simplify_type<const ilist_iterator<NodeTy> > { - typedef NodeTy* SimpleType; + typedef /*const*/ NodeTy* SimpleType; static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) { return &*Node; diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index 163331c262..da007072e5 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -20,7 +20,6 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" -#include "llvm/Support/CallSite.h" #include "llvm/Support/ValueHandle.h" #include <vector> diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h index f220c58244..b3e2d18eb2 100644 --- a/include/llvm/Analysis/BlockFrequencyImpl.h +++ b/include/llvm/Analysis/BlockFrequencyImpl.h @@ -271,7 +271,7 @@ class BlockFrequencyImpl { BlockT *EntryBlock = fn->begin(); - copy(po_begin(EntryBlock), po_end(EntryBlock), back_inserter(POT)); + std::copy(po_begin(EntryBlock), po_end(EntryBlock), std::back_inserter(POT)); unsigned RPOidx = 0; for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index 42fd3927c8..306549fba4 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -338,6 +338,10 @@ namespace llvm { /// getMax - Get the max backedge taken count for the loop. const SCEV *getMax(ScalarEvolution *SE) const; + /// Return true if any backedge taken count expressions refer to the given + /// subexpression. + bool hasOperand(const SCEV *S, ScalarEvolution *SE) const; + /// clear - Invalidate this result and free associated memory. void clear(); }; diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h index 2d2976cde1..f3139739cd 100644 --- a/include/llvm/Bitcode/BitstreamReader.h +++ b/include/llvm/Bitcode/BitstreamReader.h @@ -126,7 +126,7 @@ public: } }; - + /// BitstreamEntry - When advancing through a bitstream cursor, each advance can /// discover a few different kinds of entries: /// Error - Malformed bitcode was found. @@ -142,7 +142,7 @@ struct BitstreamEntry { SubBlock, Record } Kind; - + unsigned ID; static BitstreamEntry getError() { @@ -170,7 +170,7 @@ class BitstreamCursor { BitstreamReader *BitStream; size_t NextChar; - + /// CurWord/word_t - This is the current data we have pulled from the stream /// but have not returned to the client. This is specifically and /// intentionally defined to follow the word size of the host machine for @@ -199,7 +199,7 @@ class BitstreamCursor { /// BlockScope - This tracks the codesize of parent blocks. SmallVector<Block, 8> BlockScope; - + public: BitstreamCursor() : BitStream(0), NextChar(0) { } @@ -231,7 +231,7 @@ public: void operator=(const BitstreamCursor &RHS); void freeState(); - + bool isEndPos(size_t pos) { return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos)); } @@ -278,7 +278,7 @@ public: /// returned just like normal records. AF_DontAutoprocessAbbrevs = 2 }; - + /// advance - Advance the current bitstream, returning the next entry in the /// stream. BitstreamEntry advance(unsigned Flags = 0) { @@ -290,10 +290,10 @@ public: return BitstreamEntry::getError(); return BitstreamEntry::getEndBlock(); } - + if (Code == bitc::ENTER_SUBBLOCK) return BitstreamEntry::getSubBlock(ReadSubBlockID()); - + if (Code == bitc::DEFINE_ABBREV && !(Flags & AF_DontAutoprocessAbbrevs)) { // We read and accumulate abbrev's, the client can't do anything with @@ -314,7 +314,7 @@ public: BitstreamEntry Entry = advance(Flags); if (Entry.Kind != BitstreamEntry::SubBlock) return Entry; - + // If we found a sub-block, just skip over it and check the next entry. if (SkipBlock()) return BitstreamEntry::getError(); @@ -345,7 +345,7 @@ public: uint32_t Read(unsigned NumBits) { assert(NumBits && NumBits <= 32 && "Cannot return zero or more than 32 bits!"); - + // If the field is fully contained by CurWord, return it quickly. if (BitsInCurWord >= NumBits) { uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits)); @@ -365,15 +365,15 @@ public: // Read the next word from the stream. uint8_t Array[sizeof(word_t)] = {0}; - + BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array, NULL); - + // Handle big-endian byte-swapping if necessary. support::detail::packed_endian_specific_integral <word_t, support::little, support::unaligned> EndianValue; memcpy(&EndianValue, Array, sizeof(Array)); - + CurWord = EndianValue; NextChar += sizeof(word_t); @@ -450,7 +450,7 @@ private: BitsInCurWord = 32; return; } - + BitsInCurWord = 0; CurWord = 0; } @@ -493,7 +493,7 @@ public: /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter /// the block, and return true if the block has an error. bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0); - + bool ReadBlockEnd() { if (BlockScope.empty()) return true; @@ -529,7 +529,7 @@ private: void readAbbreviatedField(const BitCodeAbbrevOp &Op, SmallVectorImpl<uint64_t> &Vals); void skipAbbreviatedField(const BitCodeAbbrevOp &Op); - + public: /// getAbbrev - Return the abbreviation for the specified AbbrevId. @@ -541,7 +541,7 @@ public: /// skipRecord - Read the current record and discard it. void skipRecord(unsigned AbbrevID); - + unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, StringRef *Blob = 0); @@ -549,7 +549,7 @@ public: // Abbrev Processing //===--------------------------------------------------------------------===// void ReadAbbrevRecord(); - + bool ReadBlockInfoBlock(); }; diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index 7b68f8761a..a837211875 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -458,7 +458,7 @@ public: template<typename uintty> void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals, const char *ArrayData, unsigned ArrayLen) { - return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, + return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, ArrayLen)); } diff --git a/include/llvm/CodeGen/GCs.h b/include/llvm/CodeGen/GCs.h index c407b61674..456d2dcb51 100644 --- a/include/llvm/CodeGen/GCs.h +++ b/include/llvm/CodeGen/GCs.h @@ -26,6 +26,12 @@ namespace llvm { /// Creates an ocaml-compatible metadata printer. void linkOcamlGCPrinter(); + + /// Creates an erlang-compatible garbage collector. + void linkErlangGC(); + + /// Creates an erlang-compatible metadata printer. + void linkErlangGCPrinter(); /// Creates a shadow stack garbage collector. This collector requires no code /// generator support. diff --git a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h index 7d1b1fe477..c3046da90b 100644 --- a/include/llvm/CodeGen/LinkAllAsmWriterComponents.h +++ b/include/llvm/CodeGen/LinkAllAsmWriterComponents.h @@ -29,6 +29,7 @@ namespace { return; llvm::linkOcamlGCPrinter(); + llvm::linkErlangGCPrinter(); } } ForceAsmWriterLinking; // Force link by creating a global definition. diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h index 2145eb80bd..916c0f233e 100644 --- a/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -37,6 +37,7 @@ namespace { (void) llvm::createDefaultPBQPRegisterAllocator(); llvm::linkOcamlGC(); + llvm::linkErlangGC(); llvm::linkShadowStackGC(); (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default); diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 4632368157..7d72f37255 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -20,9 +20,7 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -36,6 +34,7 @@ namespace llvm { class AliasAnalysis; + class BitVector; class LiveRangeCalc; class LiveVariables; class MachineDominatorTree; diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h index def7b00ce7..8a32a3c11a 100644 --- a/include/llvm/CodeGen/LiveRangeEdit.h +++ b/include/llvm/CodeGen/LiveRangeEdit.h @@ -83,7 +83,7 @@ private: /// allUsesAvailableAt - Return true if all registers used by OrigMI at /// OrigIdx are also available with the same value at UseIdx. bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx); + SlotIndex UseIdx) const; /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h index 3c5ed9297e..6628fd278e 100644 --- a/include/llvm/CodeGen/LiveVariables.h +++ b/include/llvm/CodeGen/LiveVariables.h @@ -29,21 +29,19 @@ #ifndef LLVM_CODEGEN_LIVEVARIABLES_H #define LLVM_CODEGEN_LIVEVARIABLES_H -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { +class MachineBasicBlock; class MachineRegisterInfo; -class TargetRegisterInfo; class LiveVariables : public MachineFunctionPass { public: diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h index 1c9bdd954c..98dd03b45c 100644 --- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h +++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h @@ -1,4 +1,3 @@ - //==- MachineBranchProbabilityInfo.h - Machine Branch Probability Analysis -==// // // The LLVM Compiler Infrastructure diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 93d77287d7..cdec7e6637 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -419,6 +419,9 @@ public: /// void setStackSize(uint64_t Size) { StackSize = Size; } + /// Estimate and return the size of the stack frame. + unsigned estimateStackSize(const MachineFunction &MF) const; + /// getOffsetAdjustment - Return the correction for frame offsets. /// int getOffsetAdjustment() const { return OffsetAdjustment; } diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 1db6252ef4..a3acec8095 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -295,7 +295,7 @@ public: /// isUsedFunction - Return true if the functions in the llvm.used list. This /// does not return true for things in llvm.compiler.used unless they are also /// in llvm.used. - bool isUsedFunction(const Function *F) { + bool isUsedFunction(const Function *F) const { return UsedFunctions.count(F); } diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 4d559b5799..fc8aa75ddf 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -444,10 +444,6 @@ namespace llvm { /// information. extern char &MachineBlockPlacementStatsID; - /// Code Placement - This pass optimize code placement and aligns loop - /// headers to target specific alignment boundary. - extern char &CodePlacementOptID; - /// GCLowering Pass - Performs target-independent LLVM IR transformations for /// highly portable strategies. /// diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 01199205b5..49d16892f8 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -40,21 +40,23 @@ class RegScavenger { /// registers. bool Tracking; - /// ScavengingFrameIndex - Special spill slot used for scavenging a register - /// post register allocation. - int ScavengingFrameIndex; + /// Information on scavenged registers (held in a spill slot). + struct ScavengedInfo { + ScavengedInfo(int FI = -1) : FrameIndex(FI), Reg(0), Restore(NULL) {} - /// ScavengedReg - If none zero, the specific register is currently being - /// scavenged. That is, it is spilled to the special scavenging stack slot. - unsigned ScavengedReg; + /// A spill slot used for scavenging a register post register allocation. + int FrameIndex; - /// ScavengedRC - Register class of the scavenged register. - /// - const TargetRegisterClass *ScavengedRC; + /// If non-zero, the specific register is currently being + /// scavenged. That is, it is spilled to this scavenging stack slot. + unsigned Reg; + + /// The instruction that restores the scavenged register from stack. + const MachineInstr *Restore; + }; - /// ScavengeRestore - Instruction that restores the scavenged register from - /// stack. - const MachineInstr *ScavengeRestore; + /// A vector of information on scavenged registers. + SmallVector<ScavengedInfo, 2> Scavenged; /// CalleeSavedrRegs - A bitvector of callee saved registers for the target. /// @@ -71,8 +73,7 @@ class RegScavenger { public: RegScavenger() - : MBB(NULL), NumPhysRegs(0), Tracking(false), - ScavengingFrameIndex(-1), ScavengedReg(0), ScavengedRC(NULL) {} + : MBB(NULL), NumPhysRegs(0), Tracking(false) {} /// enterBasicBlock - Start tracking liveness from the begin of the specific /// basic block. @@ -93,8 +94,11 @@ public: } /// skipTo - Move the internal MBB iterator but do not update register states. - /// - void skipTo(MachineBasicBlock::iterator I) { MBBI = I; } + void skipTo(MachineBasicBlock::iterator I) { + if (I == MachineBasicBlock::iterator(NULL)) + Tracking = false; + MBBI = I; + } /// getRegsUsed - return all registers currently in use in used. void getRegsUsed(BitVector &used, bool includeReserved); @@ -107,10 +111,28 @@ public: /// Return 0 if none is found. unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const; - /// setScavengingFrameIndex / getScavengingFrameIndex - accessor and setter of - /// ScavengingFrameIndex. - void setScavengingFrameIndex(int FI) { ScavengingFrameIndex = FI; } - int getScavengingFrameIndex() const { return ScavengingFrameIndex; } + /// Add a scavenging frame index. + void addScavengingFrameIndex(int FI) { + Scavenged.push_back(ScavengedInfo(FI)); + } + + /// Query whether a frame index is a scavenging frame index. + bool isScavengingFrameIndex(int FI) const { + for (SmallVector<ScavengedInfo, 2>::const_iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) + if (I->FrameIndex == FI) + return true; + + return false; + } + + /// Get an array of scavenging frame indices. + void getScavengingFrameIndices(SmallVectorImpl<int> &A) const { + for (SmallVector<ScavengedInfo, 2>::const_iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) + if (I->FrameIndex >= 0) + A.push_back(I->FrameIndex); + } /// scavengeRegister - Make a register of the specific register class /// available and do the appropriate bookkeeping. SPAdj is the stack diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 05f3b1494f..fef567f56b 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -196,14 +196,14 @@ template <> struct isPodLike<SDValue> { static const bool value = true; }; /// SDValues as if they were SDNode*'s. template<> struct simplify_type<SDValue> { typedef SDNode* SimpleType; - static SimpleType getSimplifiedValue(const SDValue &Val) { - return static_cast<SimpleType>(Val.getNode()); + static SimpleType getSimplifiedValue(SDValue &Val) { + return Val.getNode(); } }; template<> struct simplify_type<const SDValue> { - typedef SDNode* SimpleType; + typedef /*const*/ SDNode* SimpleType; static SimpleType getSimplifiedValue(const SDValue &Val) { - return static_cast<SimpleType>(Val.getNode()); + return Val.getNode(); } }; @@ -295,14 +295,8 @@ private: /// SDValues as if they were SDNode*'s. template<> struct simplify_type<SDUse> { typedef SDNode* SimpleType; - static SimpleType getSimplifiedValue(const SDUse &Val) { - return static_cast<SimpleType>(Val.getNode()); - } -}; -template<> struct simplify_type<const SDUse> { - typedef SDNode* SimpleType; - static SimpleType getSimplifiedValue(const SDUse &Val) { - return static_cast<SimpleType>(Val.getNode()); + static SimpleType getSimplifiedValue(SDUse &Val) { + return Val.getNode(); } }; diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index 2f9e6ffe23..5a3d02c553 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -78,6 +78,14 @@ /* Define to 1 if you have the <ctype.h> header file. */ #undef HAVE_CTYPE_H +/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you + don't. */ +#undef HAVE_DECL_FE_ALL_EXCEPT + +/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you + don't. */ +#undef HAVE_DECL_FE_INEXACT + /* Define to 1 if you have the declaration of `strerror_s', and to 0 if you don't. */ #undef HAVE_DECL_STRERROR_S diff --git a/include/llvm/DIBuilder.h b/include/llvm/DIBuilder.h index d59847558a..4f0aa07130 100644 --- a/include/llvm/DIBuilder.h +++ b/include/llvm/DIBuilder.h @@ -265,12 +265,13 @@ namespace llvm { /// DW_AT_containing_type. See DWARF documentation /// for more info. /// @param TemplateParms Template type parameters. - DIType createClassType(DIDescriptor Scope, StringRef Name, DIFile File, - unsigned LineNumber, uint64_t SizeInBits, - uint64_t AlignInBits, uint64_t OffsetInBits, - unsigned Flags, DIType DerivedFrom, - DIArray Elements, MDNode *VTableHolder = 0, - MDNode *TemplateParms = 0); + DICompositeType createClassType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType DerivedFrom, DIArray Elements, + MDNode *VTableHolder = 0, + MDNode *TemplateParms = 0); /// createStructType - Create debugging information entry for a struct. /// @param Scope Scope in which this struct is defined. @@ -375,10 +376,6 @@ namespace llvm { /// flag set. DIType createObjectPointerType(DIType Ty); - /// createTemporaryType - Create a temporary forward-declared type. - DIType createTemporaryType(); - DIType createTemporaryType(DIFile F); - /// createForwardDecl - Create a temporary forward-declared type. DIType createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line, unsigned RuntimeLang = 0, @@ -411,6 +408,19 @@ namespace llvm { createGlobalVariable(StringRef Name, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, llvm::Value *Val); + /// \brief Create a new descriptor for the specified global. + /// @param Name Name of the variable. + /// @param LinkageName Mangled variable name. + /// @param File File where this variable is defined. + /// @param LineNo Line number. + /// @param Ty Variable Type. + /// @param isLocalToUnit Boolean flag indicate whether this variable is + /// externally visible or not. + /// @param Val llvm::Value of the variable. + DIGlobalVariable + createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile File, + unsigned LineNo, DIType Ty, bool isLocalToUnit, + llvm::Value *Val); /// createStaticVariable - Create a new descriptor for the specified /// variable. diff --git a/include/llvm/DebugInfo.h b/include/llvm/DebugInfo.h index 42d94f4777..15f91870a5 100644 --- a/include/llvm/DebugInfo.h +++ b/include/llvm/DebugInfo.h @@ -169,6 +169,18 @@ namespace llvm { StringRef getDirectory() const; }; + /// DIFile - This is a wrapper for a file. + class DIFile : public DIScope { + friend class DIDescriptor; + public: + explicit DIFile(const MDNode *N = 0) : DIScope(N) { + if (DbgNode && !isFile()) + DbgNode = 0; + } + MDNode *getFileNode() const; + bool Verify() const; + }; + /// DICompileUnit - A wrapper for a compile unit. class DICompileUnit : public DIScope { friend class DIDescriptor; @@ -176,50 +188,24 @@ namespace llvm { public: explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {} - unsigned getLanguage() const { return getUnsignedField(2); } - StringRef getFilename() const { return getStringField(3); } - StringRef getDirectory() const { return getStringField(4); } - StringRef getProducer() const { return getStringField(5); } - - /// isMain - Each input file is encoded as a separate compile unit in LLVM - /// debugging information output. However, many target specific tool chains - /// prefer to encode only one compile unit in an object file. In this - /// situation, the LLVM code generator will include debugging information - /// entities in the compile unit that is marked as main compile unit. The - /// code generator accepts maximum one main compile unit per module. If a - /// module does not contain any main compile unit then the code generator - /// will emit multiple compile units in the output object file. - // TODO: This can be removed when we remove the legacy debug information. - bool isMain() const { return getUnsignedField(6) != 0; } - bool isOptimized() const { return getUnsignedField(7) != 0; } - StringRef getFlags() const { return getStringField(8); } - unsigned getRunTimeVersion() const { return getUnsignedField(9); } + unsigned getLanguage() const { return getUnsignedField(2); } + StringRef getProducer() const { return getStringField(3); } + + bool isOptimized() const { return getUnsignedField(4) != 0; } + StringRef getFlags() const { return getStringField(5); } + unsigned getRunTimeVersion() const { return getUnsignedField(6); } DIArray getEnumTypes() const; DIArray getRetainedTypes() const; DIArray getSubprograms() const; DIArray getGlobalVariables() const; - StringRef getSplitDebugFilename() const { return getStringField(14); } + StringRef getSplitDebugFilename() const { return getStringField(11); } /// Verify - Verify that a compile unit is well formed. bool Verify() const; }; - /// DIFile - This is a wrapper for a file. - class DIFile : public DIScope { - friend class DIDescriptor; - void printInternal(raw_ostream &OS) const {} // FIXME: Output something? - public: - explicit DIFile(const MDNode *N = 0) : DIScope(N) { - if (DbgNode && !isFile()) - DbgNode = 0; - } - StringRef getFilename() const { return getStringField(1); } - StringRef getDirectory() const { return getStringField(2); } - bool Verify() const; - }; - /// DIEnumerator - A wrapper for an enumerator (e.g. X and Y in 'enum {X,Y}'). /// FIXME: it seems strange that this doesn't have either a reference to the /// type/precision or a file/line pair for location info. @@ -250,9 +236,8 @@ namespace llvm { explicit DIType(const MDNode *N); explicit DIType() {} - DIScope getContext() const { return getFieldAs<DIScope>(1); } - StringRef getName() const { return getStringField(2); } - DIFile getFile() const { return getFieldAs<DIFile>(3); } + DIScope getContext() const { return getFieldAs<DIScope>(2); } + StringRef getName() const { return getStringField(3); } unsigned getLineNumber() const { return getUnsignedField(4); } uint64_t getSizeInBits() const { return getUInt64Field(5); } uint64_t getAlignInBits() const { return getUInt64Field(6); } @@ -297,12 +282,6 @@ namespace llvm { bool isValid() const { return DbgNode && (isBasicType() || isDerivedType() || isCompositeType()); } - StringRef getDirectory() const { - return getFieldAs<DIFile>(3).getDirectory(); - } - StringRef getFilename() const { - return getFieldAs<DIFile>(3).getFilename(); - } /// isUnsignedDIType - Return true if type encoding is unsigned. bool isUnsignedDIType(); @@ -375,10 +354,12 @@ namespace llvm { } DIArray getTypeArray() const { return getFieldAs<DIArray>(10); } + void setTypeArray(DIArray Elements, DIArray TParams = DIArray()); unsigned getRunTimeLang() const { return getUnsignedField(11); } DICompositeType getContainingType() const { return getFieldAs<DICompositeType>(12); } + void setContainingType(DICompositeType ContainingType); DIArray getTemplateParams() const { return getFieldAs<DIArray>(13); } /// Verify - Verify that a composite type descriptor is well formed. @@ -435,78 +416,66 @@ namespace llvm { StringRef getName() const { return getStringField(3); } StringRef getDisplayName() const { return getStringField(4); } StringRef getLinkageName() const { return getStringField(5); } - unsigned getLineNumber() const { return getUnsignedField(7); } - DICompositeType getType() const { return getFieldAs<DICompositeType>(8); } + unsigned getLineNumber() const { return getUnsignedField(6); } + DICompositeType getType() const { return getFieldAs<DICompositeType>(7); } /// getReturnTypeName - Subprogram return types are encoded either as /// DIType or as DICompositeType. StringRef getReturnTypeName() const { - DICompositeType DCT(getFieldAs<DICompositeType>(8)); + DICompositeType DCT(getFieldAs<DICompositeType>(7)); if (DCT.Verify()) { DIArray A = DCT.getTypeArray(); DIType T(A.getElement(0)); return T.getName(); } - DIType T(getFieldAs<DIType>(8)); + DIType T(getFieldAs<DIType>(7)); return T.getName(); } /// isLocalToUnit - Return true if this subprogram is local to the current /// compile unit, like 'static' in C. - unsigned isLocalToUnit() const { return getUnsignedField(9); } - unsigned isDefinition() const { return getUnsignedField(10); } + unsigned isLocalToUnit() const { return getUnsignedField(8); } + unsigned isDefinition() const { return getUnsignedField(9); } - unsigned getVirtuality() const { return getUnsignedField(11); } - unsigned getVirtualIndex() const { return getUnsignedField(12); } + unsigned getVirtuality() const { return getUnsignedField(10); } + unsigned getVirtualIndex() const { return getUnsignedField(11); } DICompositeType getContainingType() const { - return getFieldAs<DICompositeType>(13); + return getFieldAs<DICompositeType>(12); } unsigned getFlags() const { - return getUnsignedField(14); + return getUnsignedField(13); } unsigned isArtificial() const { - return (getUnsignedField(14) & FlagArtificial) != 0; + return (getUnsignedField(13) & FlagArtificial) != 0; } /// isPrivate - Return true if this subprogram has "private" /// access specifier. bool isPrivate() const { - return (getUnsignedField(14) & FlagPrivate) != 0; + return (getUnsignedField(13) & FlagPrivate) != 0; } /// isProtected - Return true if this subprogram has "protected" /// access specifier. bool isProtected() const { - return (getUnsignedField(14) & FlagProtected) != 0; + return (getUnsignedField(13) & FlagProtected) != 0; } /// isExplicit - Return true if this subprogram is marked as explicit. bool isExplicit() const { - return (getUnsignedField(14) & FlagExplicit) != 0; + return (getUnsignedField(13) & FlagExplicit) != 0; } /// isPrototyped - Return true if this subprogram is prototyped. bool isPrototyped() const { - return (getUnsignedField(14) & FlagPrototyped) != 0; + return (getUnsignedField(13) & FlagPrototyped) != 0; } unsigned isOptimized() const; - StringRef getFilename() const { - return getFieldAs<DIFile>(6).getFilename(); - } - - StringRef getDirectory() const { - return getFieldAs<DIFile>(6).getDirectory(); - } - - DIFile getFile() const { - return getFieldAs<DIFile>(6); - } - /// getScopeLineNumber - Get the beginning of the scope of the /// function, not necessarily where the name of the program /// starts. - unsigned getScopeLineNumber() const { return getUnsignedField(20); } + unsigned getScopeLineNumber() const { return getUnsignedField(19); } /// Verify - Verify that a subprogram descriptor is well formed. bool Verify() const; @@ -515,11 +484,11 @@ namespace llvm { /// information for the function F. bool describes(const Function *F); - Function *getFunction() const { return getFunctionField(16); } - void replaceFunction(Function *F) { replaceFunctionField(16, F); } - DIArray getTemplateParams() const { return getFieldAs<DIArray>(17); } + Function *getFunction() const { return getFunctionField(15); } + void replaceFunction(Function *F) { replaceFunctionField(15, F); } + DIArray getTemplateParams() const { return getFieldAs<DIArray>(16); } DISubprogram getFunctionDeclaration() const { - return getFieldAs<DISubprogram>(18); + return getFieldAs<DISubprogram>(17); } MDNode *getVariablesNodes() const; DIArray getVariables() const; @@ -623,17 +592,9 @@ namespace llvm { class DILexicalBlock : public DIScope { public: explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {} - DIScope getContext() const { return getFieldAs<DIScope>(1); } - unsigned getLineNumber() const { return getUnsignedField(2); } - unsigned getColumnNumber() const { return getUnsignedField(3); } - StringRef getDirectory() const { - StringRef dir = getFieldAs<DIFile>(4).getDirectory(); - return !dir.empty() ? dir : getContext().getDirectory(); - } - StringRef getFilename() const { - StringRef filename = getFieldAs<DIFile>(4).getFilename(); - return !filename.empty() ? filename : getContext().getFilename(); - } + DIScope getContext() const { return getFieldAs<DIScope>(2); } + unsigned getLineNumber() const { return getUnsignedField(3); } + unsigned getColumnNumber() const { return getUnsignedField(4); } bool Verify() const; }; @@ -645,31 +606,18 @@ namespace llvm { DIScope getContext() const { if (getScope().isSubprogram()) return getScope(); return getScope().getContext(); } unsigned getLineNumber() const { return getScope().getLineNumber(); } unsigned getColumnNumber() const { return getScope().getColumnNumber(); } - StringRef getDirectory() const { - StringRef dir = getFieldAs<DIFile>(2).getDirectory(); - return !dir.empty() ? dir : getContext().getDirectory(); - } - StringRef getFilename() const { - StringRef filename = getFieldAs<DIFile>(2).getFilename(); - assert(!filename.empty() && "Why'd you create this then?"); - return filename; - } - DILexicalBlock getScope() const { return getFieldAs<DILexicalBlock>(1); } + DILexicalBlock getScope() const { return getFieldAs<DILexicalBlock>(2); } bool Verify() const; }; /// DINameSpace - A wrapper for a C++ style name space. class DINameSpace : public DIScope { + friend class DIDescriptor; + void printInternal(raw_ostream &OS) const; public: explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {} - DIScope getContext() const { return getFieldAs<DIScope>(1); } - StringRef getName() const { return getStringField(2); } - StringRef getDirectory() const { - return getFieldAs<DIFile>(3).getDirectory(); - } - StringRef getFilename() const { - return getFieldAs<DIFile>(3).getFilename(); - } + DIScope getContext() const { return getFieldAs<DIScope>(2); } + StringRef getName() const { return getStringField(3); } unsigned getLineNumber() const { return getUnsignedField(4); } bool Verify() const; }; diff --git a/include/llvm/ExecutionEngine/GenericValue.h b/include/llvm/ExecutionEngine/GenericValue.h index e160e3aafd..0e92f79eba 100644 --- a/include/llvm/ExecutionEngine/GenericValue.h +++ b/include/llvm/ExecutionEngine/GenericValue.h @@ -24,21 +24,30 @@ typedef void* PointerTy; class APInt; struct GenericValue { + struct IntPair { + unsigned int first; + unsigned int second; + }; union { double DoubleVal; float FloatVal; PointerTy PointerVal; - struct { unsigned int first; unsigned int second; } UIntPairVal; + struct IntPair UIntPairVal; unsigned char Untyped[8]; }; - APInt IntVal; // also used for long doubles - - GenericValue() : DoubleVal(0.0), IntVal(1,0) {} + APInt IntVal; // also used for long doubles. + // For aggregate data types. + std::vector<GenericValue> AggregateVal; + + // to make code faster, set GenericValue to zero could be omitted, but it is + // potentially can cause problems, since GenericValue to store garbage + // instead of zero. + GenericValue() : IntVal(1,0) {UIntPairVal.first = 0; UIntPairVal.second = 0;} explicit GenericValue(void *V) : PointerVal(V), IntVal(1,0) { } }; inline GenericValue PTOGV(void *P) { return GenericValue(P); } inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; } -} // End llvm namespace +} // End llvm namespace. #endif diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h index 30c0965337..074b38779a 100644 --- a/include/llvm/IR/Attributes.h +++ b/include/llvm/IR/Attributes.h @@ -226,11 +226,6 @@ private: explicit AttributeSet(AttributeSetImpl *LI) : pImpl(LI) {} public: AttributeSet() : pImpl(0) {} - AttributeSet(const AttributeSet &P) : pImpl(P.pImpl) {} - const AttributeSet &operator=(const AttributeSet &RHS) { - pImpl = RHS.pImpl; - return *this; - } //===--------------------------------------------------------------------===// // AttributeSet Construction and Mutation @@ -247,6 +242,11 @@ public: AttributeSet addAttribute(LLVMContext &C, unsigned Idx, Attribute::AttrKind Attr) const; + /// \brief Add an attribute to the attribute set at the given index. Since + /// attribute sets are immutable, this returns a new set. + AttributeSet addAttribute(LLVMContext &C, unsigned Idx, + StringRef Kind) const; + /// \brief Add attributes to the attribute set at the given index. Since /// attribute sets are immutable, this returns a new set. AttributeSet addAttributes(LLVMContext &C, unsigned Idx, diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h index 5eb186d531..547d857b7b 100644 --- a/include/llvm/IR/DataLayout.h +++ b/include/llvm/IR/DataLayout.h @@ -22,6 +22,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" #include "llvm/Support/DataTypes.h" @@ -181,6 +183,10 @@ public: ~DataLayout(); // Not virtual, do not subclass this class + /// DataLayout is an immutable pass, but holds state. This allows the pass + /// manager to clear its mutable state. + bool doFinalization(Module &M); + /// Parse a data layout string (with fallback to default values). Ensure that /// the data layout pass is registered. void init(StringRef LayoutDescription); @@ -346,6 +352,10 @@ public: /// type. Type *getIntPtrType(Type *) const; + /// getSmallestLegalIntType - Return the smallest integer type with size at + /// least as big as Width bits. + Type *getSmallestLegalIntType(LLVMContext &C, unsigned Width = 0) const; + /// getIndexedOffset - return the offset from the beginning of the type for /// the specified indices. This is used to implement getelementptr. uint64_t getIndexedOffset(Type *Ty, ArrayRef<Value *> Indices) const; @@ -419,6 +429,49 @@ private: StructLayout(StructType *ST, const DataLayout &TD); }; + +// The implementation of this method is provided inline as it is particularly +// well suited to constant folding when called on a specific Type subclass. +inline uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { + assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); + switch (Ty->getTypeID()) { + case Type::LabelTyID: + return getPointerSizeInBits(0); + case Type::PointerTyID: + return getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace()); + case Type::ArrayTyID: { + ArrayType *ATy = cast<ArrayType>(Ty); + return ATy->getNumElements() * + getTypeAllocSizeInBits(ATy->getElementType()); + } + case Type::StructTyID: + // Get the layout annotation... which is lazily created on demand. + return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); + case Type::IntegerTyID: + return cast<IntegerType>(Ty)->getBitWidth(); + case Type::HalfTyID: + return 16; + case Type::FloatTyID: + return 32; + case Type::DoubleTyID: + case Type::X86_MMXTyID: + return 64; + case Type::PPC_FP128TyID: + case Type::FP128TyID: + return 128; + // In memory objects this is always aligned to a higher boundary, but + // only 80 bits contain information. + case Type::X86_FP80TyID: + return 80; + case Type::VectorTyID: { + VectorType *VTy = cast<VectorType>(Ty); + return VTy->getNumElements() * getTypeSizeInBits(VTy->getElementType()); + } + default: + llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); + } +} + } // End llvm namespace #endif diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index a7dea5ca3d..f97929f658 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -181,6 +181,14 @@ public: AttributeSet::FunctionIndex, N)); } + /// addFnAttr - Add function attributes to this function. + /// + void addFnAttr(StringRef Kind) { + setAttributes( + AttributeSets.addAttribute(getContext(), + AttributeSet::FunctionIndex, Kind)); + } + /// \brief Return true if the function has the attribute. bool hasFnAttribute(Attribute::AttrKind Kind) const { return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Kind); diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index d2463c0efa..69e0ab4fa2 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2550,7 +2550,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". } //===----------------------------------------------------------------------===// -// RDRAND intrinsics. Return a random value and whether it is valid. +// RDRAND intrinsics - Return a random value and whether it is valid. +// RDSEED intrinsics - Return a NIST SP800-90B & C compliant random value and +// whether it is valid. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // These are declared side-effecting so they don't get eliminated by CSE or @@ -2558,6 +2560,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_rdrand_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>; def int_x86_rdrand_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>; def int_x86_rdrand_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; + def int_x86_rdseed_16 : Intrinsic<[llvm_i16_ty, llvm_i32_ty], [], []>; + def int_x86_rdseed_32 : Intrinsic<[llvm_i32_ty, llvm_i32_ty], [], []>; + def int_x86_rdseed_64 : Intrinsic<[llvm_i64_ty, llvm_i32_ty], [], []>; } //===----------------------------------------------------------------------===// @@ -2570,4 +2575,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[], [], []>; def int_x86_xabort : GCCBuiltin<"__builtin_ia32_xabort">, Intrinsic<[], [llvm_i8_ty], [IntrNoReturn]>; + def int_x86_xtest : GCCBuiltin<"__builtin_ia32_xtest">, + Intrinsic<[llvm_i32_ty], [], []>; } diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h index 33a69c4686..4bc7ce5000 100644 --- a/include/llvm/IR/Use.h +++ b/include/llvm/IR/Use.h @@ -149,14 +149,14 @@ private: // casting operators. template<> struct simplify_type<Use> { typedef Value* SimpleType; - static SimpleType getSimplifiedValue(const Use &Val) { - return static_cast<SimpleType>(Val.get()); + static SimpleType getSimplifiedValue(Use &Val) { + return Val.get(); } }; template<> struct simplify_type<const Use> { - typedef Value* SimpleType; + typedef /*const*/ Value* SimpleType; static SimpleType getSimplifiedValue(const Use &Val) { - return static_cast<SimpleType>(Val.get()); + return Val.get(); } }; diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index a4d0a5d7df..505bdeb178 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -183,27 +183,17 @@ public: template<> struct simplify_type<User::op_iterator> { typedef Value* SimpleType; - - static SimpleType getSimplifiedValue(const User::op_iterator &Val) { - return static_cast<SimpleType>(Val->get()); + static SimpleType getSimplifiedValue(User::op_iterator &Val) { + return Val->get(); } }; - -template<> struct simplify_type<const User::op_iterator> - : public simplify_type<User::op_iterator> {}; - template<> struct simplify_type<User::const_op_iterator> { - typedef Value* SimpleType; - - static SimpleType getSimplifiedValue(const User::const_op_iterator &Val) { - return static_cast<SimpleType>(Val->get()); + typedef /*const*/ Value* SimpleType; + static SimpleType getSimplifiedValue(User::const_op_iterator &Val) { + return Val->get(); } }; -template<> struct simplify_type<const User::const_op_iterator> - : public simplify_type<User::const_op_iterator> {}; - - // value_use_iterator::getOperandNo - Requires the definition of the User class. template<typename UserTy> unsigned value_use_iterator<UserTy>::getOperandNo() const { diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h new file mode 100644 index 0000000000..e2ae5f7164 --- /dev/null +++ b/include/llvm/IRReader/IRReader.h @@ -0,0 +1,55 @@ +//===---- llvm/IRReader/IRReader.h - Reader for LLVM IR files ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions for reading LLVM IR. They support both +// Bitcode and Assembly, automatically detecting the input format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IRREADER_IRREADER_H +#define LLVM_IRREADER_IRREADER_H + +#include <string> + +namespace llvm { + +class Module; +class MemoryBuffer; +class SMDiagnostic; +class LLVMContext; + +/// If the given MemoryBuffer holds a bitcode image, return a Module for it +/// which does lazy deserialization of function bodies. Otherwise, attempt to +/// parse it as LLVM Assembly and return a fully populated Module. This +/// function *always* takes ownership of the given MemoryBuffer. +Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, + LLVMContext &Context); + +/// If the given file holds a bitcode image, return a Module +/// for it which does lazy deserialization of function bodies. Otherwise, +/// attempt to parse it as LLVM Assembly and return a fully populated +/// Module. +Module *getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err, + LLVMContext &Context); + +/// If the given MemoryBuffer holds a bitcode image, return a Module +/// for it. Otherwise, attempt to parse it as LLVM Assembly and return +/// a Module for it. This function *always* takes ownership of the given +/// MemoryBuffer. +Module *ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, LLVMContext &Context); + +/// If the given file holds a bitcode image, return a Module for it. +/// Otherwise, attempt to parse it as LLVM Assembly and return a Module +/// for it. +Module *ParseIRFile(const std::string &Filename, SMDiagnostic &Err, + LLVMContext &Context); + +} + +#endif diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index a6dbb0c7d2..9cc194b424 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -91,7 +91,6 @@ void initializeCFGViewerPass(PassRegistry&); void initializeCalculateSpillWeightsPass(PassRegistry&); void initializeCallGraphAnalysisGroup(PassRegistry&); void initializeCodeGenPreparePass(PassRegistry&); -void initializeCodePlacementOptPass(PassRegistry&); void initializeConstantMergePass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeMachineCopyPropagationPass(PassRegistry&); diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h index 8bf9efa8e2..679638427d 100644 --- a/include/llvm/Linker.h +++ b/include/llvm/Linker.h @@ -6,10 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file defines the interface to the module/file/archive linker. -// -//===----------------------------------------------------------------------===// #ifndef LLVM_LINKER_H #define LLVM_LINKER_H @@ -19,7 +15,6 @@ #include <vector> namespace llvm { - namespace sys { class Path; } class Module; class LLVMContext; @@ -31,8 +26,7 @@ class StringRef; /// In this case the Linker still retains ownership of the Module. If the /// releaseModule() method is used, the ownership of the Module is transferred /// to the caller and the Linker object is only suitable for destruction. -/// The Linker can link Modules from memory. It retains a set of search paths -/// in which to find any libraries presented to it. By default, the linker +/// The Linker can link Modules from memory. By default, the linker /// will generate error and warning messages to stderr but this capability can /// be turned off with the QuietWarnings and QuietErrors flags. It can also be /// instructed to verbosely print out the linking actions it is taking with @@ -96,16 +90,10 @@ class Linker { /// must arrange for its destruct. After this method is called, the Linker /// terminates the linking session for the returned Module. It will no /// longer utilize the returned Module but instead resets itself for - /// subsequent linking as if the constructor had been called. The Linker's - /// LibPaths and flags to be reset, and memory will be released. + /// subsequent linking as if the constructor had been called. /// @brief Release the linked/composite module. Module* releaseModule(); - /// This method gets the list of libraries that form the path that the - /// Linker will search when it is presented with a library name. - /// @brief Get the Linkers library path - const std::vector<sys::Path>& getLibPaths() const { return LibPaths; } - /// This method returns an error string suitable for printing to the user. /// The return value will be empty unless an error occurred in one of the /// LinkIn* methods. In those cases, the LinkIn* methods will have returned @@ -120,31 +108,6 @@ class Linker { /// @name Mutators /// @{ public: - /// Add a path to the list of paths that the Linker will search. The Linker - /// accumulates the set of libraries added - /// library paths for the target platform. The standard libraries will - /// always be searched last. The added libraries will be searched in the - /// order added. - /// @brief Add a path. - void addPath(const sys::Path& path); - - /// Add a set of paths to the list of paths that the linker will search. The - /// Linker accumulates the set of libraries added. The \p paths will be - /// added to the end of the Linker's list. Order will be retained. - /// @brief Add a set of paths. - void addPaths(const std::vector<std::string>& paths); - - /// This method augments the Linker's list of library paths with the system - /// paths of the host operating system, include LLVM_LIB_SEARCH_PATH. - /// @brief Add the system paths. - void addSystemPaths(); - - /// Control optional linker behavior by setting a group of flags. The flags - /// are defined in the ControlFlags enumeration. - /// @see ControlFlags - /// @brief Set control flags. - void setFlags(unsigned flags) { Flags = flags; } - /// This method links the \p Src module into the Linker's Composite module /// by calling LinkModules. /// @see LinkModules @@ -185,7 +148,6 @@ class Linker { private: LLVMContext& Context; ///< The context for global information Module* Composite; ///< The composite module linked together - std::vector<sys::Path> LibPaths; ///< The library search paths unsigned Flags; ///< Flags to control optional behavior. std::string Error; ///< Text of error that occurred. std::string ProgramName; ///< Name of the program being linked diff --git a/include/llvm/MC/MCAtom.h b/include/llvm/MC/MCAtom.h index 682cf7cd76..ae5bf0bc20 100644 --- a/include/llvm/MC/MCAtom.h +++ b/include/llvm/MC/MCAtom.h @@ -46,8 +46,8 @@ class MCAtom { : Type(T), Parent(P), Begin(B), End(E) { } public: - bool isTextAtom() { return Type == TextAtom; } - bool isDataAtom() { return Type == DataAtom; } + bool isTextAtom() const { return Type == TextAtom; } + bool isDataAtom() const { return Type == DataAtom; } void addInst(const MCInst &I, uint64_t Address, unsigned Size); void addData(const MCData &D); diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 1251c5fbe2..0db3dee2ff 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -11,6 +11,7 @@ #define LLVM_MC_MCCONTEXT_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/SectionKind.h" @@ -105,9 +106,9 @@ namespace llvm { /// We now emit a line table for each compile unit. To reduce the prologue /// size of each line table, the files and directories used by each compile /// unit are separated. - typedef std::map<unsigned, std::vector<MCDwarfFile *> > MCDwarfFilesMap; + typedef std::map<unsigned, SmallVector<MCDwarfFile *, 4> > MCDwarfFilesMap; MCDwarfFilesMap MCDwarfFilesCUMap; - std::map<unsigned, std::vector<StringRef> > MCDwarfDirsCUMap; + std::map<unsigned, SmallVector<StringRef, 4> > MCDwarfDirsCUMap; /// The current dwarf line information from the last dwarf .loc directive. MCDwarfLoc CurrentDwarfLoc; @@ -301,10 +302,10 @@ namespace llvm { return false; } - const std::vector<MCDwarfFile *> &getMCDwarfFiles(unsigned CUID = 0) { + const SmallVectorImpl<MCDwarfFile *> &getMCDwarfFiles(unsigned CUID = 0) { return MCDwarfFilesCUMap[CUID]; } - const std::vector<StringRef> &getMCDwarfDirs(unsigned CUID = 0) { + const SmallVectorImpl<StringRef> &getMCDwarfDirs(unsigned CUID = 0) { return MCDwarfDirsCUMap[CUID]; } diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h index c78cd976f2..4650bf21be 100644 --- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h +++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h @@ -57,19 +57,12 @@ public: /// isMem - Is this a memory operand? virtual bool isMem() const = 0; - virtual unsigned getMemSize() const { return 0; } /// getStartLoc - Get the location of the first token of this operand. virtual SMLoc getStartLoc() const = 0; /// getEndLoc - Get the location of the last token of this operand. virtual SMLoc getEndLoc() const = 0; - /// needAsmRewrite - AsmRewrites happen in both the target-independent and - /// target-dependent parsers. The target-independent parser calls this - /// function to determine if the target-dependent parser has already taken - /// care of the rewrites. Only valid when parsing MS-style inline assembly. - virtual bool needAsmRewrite() const { return true; } - /// needAddressOf - Do we need to emit code to get the address of the /// variable/label? Only valid when parsing MS-style inline assembly. virtual bool needAddressOf() const { return false; } @@ -82,10 +75,6 @@ public: /// getOffsetOfLoc - Get the location of the offset operator. virtual SMLoc getOffsetOfLoc() const { return SMLoc(); } - /// needSizeDirective - Do we need to emit a sizing directive for this - /// operand? Only valid when parsing MS-style inline assembly. - virtual bool needSizeDirective() const { return false; } - /// print - Print a debug representation of the operand to the given stream. virtual void print(raw_ostream &OS) const = 0; /// dump - Print to the debug stream. diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h index 36e35f5748..b0d8663b7d 100644 --- a/include/llvm/Object/ELF.h +++ b/include/llvm/Object/ELF.h @@ -1634,6 +1634,63 @@ error_code ELFObjectFile<ELFT>::getRelocationTypeName( res = "Unknown"; } break; + case ELF::EM_MIPS: + switch (type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_26); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LITERAL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT5); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT6); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_DISP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_OFST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SUB); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_A); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_B); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_DELETE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHER); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHEST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SCN_DISP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_ADD_IMMEDIATE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PJUMP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_RELGOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JALR); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_LDM); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GOTTPREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT); + default: + res = "Unknown"; + } + break; case ELF::EM_AARCH64: switch (type) { LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE); @@ -2303,8 +2360,9 @@ ELFObjectFile<ELFT>::end_dynamic_table(bool NULLEnd) const { if (NULLEnd) { Elf_Dyn_iterator Start = begin_dynamic_table(); - for (; Start != Ret && Start->getTag() != ELF::DT_NULL; ++Start) - ; + while (Start != Ret && Start->getTag() != ELF::DT_NULL) + ++Start; + // Include the DT_NULL. if (Start != Ret) ++Start; @@ -2321,10 +2379,9 @@ StringRef ELFObjectFile<ELFT>::getLoadName() const { // Find the DT_SONAME entry Elf_Dyn_iterator it = begin_dynamic_table(); Elf_Dyn_iterator ie = end_dynamic_table(); - for (; it != ie; ++it) { - if (it->getTag() == ELF::DT_SONAME) - break; - } + while (it != ie && it->getTag() != ELF::DT_SONAME) + ++it; + if (it != ie) { if (dot_dynstr_sec == NULL) report_fatal_error("Dynamic string table is missing"); @@ -2341,10 +2398,8 @@ library_iterator ELFObjectFile<ELFT>::begin_libraries_needed() const { // Find the first DT_NEEDED entry Elf_Dyn_iterator i = begin_dynamic_table(); Elf_Dyn_iterator e = end_dynamic_table(); - for (; i != e; ++i) { - if (i->getTag() == ELF::DT_NEEDED) - break; - } + while (i != e && i->getTag() != ELF::DT_NEEDED) + ++i; DataRefImpl DRI; DRI.p = reinterpret_cast<uintptr_t>(i.get()); @@ -2359,11 +2414,10 @@ error_code ELFObjectFile<ELFT>::getLibraryNext(DataRefImpl Data, reinterpret_cast<const char *>(Data.p)); Elf_Dyn_iterator e = end_dynamic_table(); - // Skip the current dynamic table entry. - ++i; - - // Find the next DT_NEEDED entry. - for (; i != e && i->getTag() != ELF::DT_NEEDED; ++i); + // Skip the current dynamic table entry and find the next DT_NEEDED entry. + do + ++i; + while (i != e && i->getTag() != ELF::DT_NEEDED); DataRefImpl DRI; DRI.p = reinterpret_cast<uintptr_t>(i.get()); @@ -2704,6 +2758,21 @@ static inline error_code GetELFSymbolVersion(const ObjectFile *Obj, llvm_unreachable("Object passed to GetELFSymbolVersion() is not ELF"); } +/// This function returns the hash value for a symbol in the .dynsym section +/// Name of the API remains consistent as specified in the libelf +/// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash +static inline unsigned elf_hash(StringRef &symbolName) { + unsigned h = 0, g; + for (unsigned i = 0, j = symbolName.size(); i < j; i++) { + h = (h << 4) + symbolName[i]; + g = h & 0xf0000000L; + if (g != 0) + h ^= g >> 24; + h &= ~g; + } + return h; +} + } } diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h index a9fb65cba9..265b886daf 100644 --- a/include/llvm/Support/CFG.h +++ b/include/llvm/Support/CFG.h @@ -27,8 +27,9 @@ namespace llvm { template <class Ptr, class USE_iterator> // Predecessor Iterator class PredIterator : public std::iterator<std::forward_iterator_tag, - Ptr, ptrdiff_t> { - typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t> super; + Ptr, ptrdiff_t, Ptr*, Ptr*> { + typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t, Ptr*, + Ptr*> super; typedef PredIterator<Ptr, USE_iterator> Self; USE_iterator It; @@ -40,6 +41,7 @@ class PredIterator : public std::iterator<std::forward_iterator_tag, public: typedef typename super::pointer pointer; + typedef typename super::reference reference; PredIterator() {} explicit inline PredIterator(Ptr *bb) : It(bb->use_begin()) { @@ -50,7 +52,7 @@ public: inline bool operator==(const Self& x) const { return It == x.It; } inline bool operator!=(const Self& x) const { return !operator==(x); } - inline pointer operator*() const { + inline reference operator*() const { assert(!It.atEnd() && "pred_iterator out of range!"); return cast<TerminatorInst>(*It)->getParent(); } @@ -100,10 +102,11 @@ inline const_pred_iterator pred_end(const BasicBlock *BB) { template <class Term_, class BB_> // Successor Iterator class SuccIterator : public std::iterator<std::bidirectional_iterator_tag, - BB_, ptrdiff_t> { + BB_, ptrdiff_t, BB_*, BB_*> { const Term_ Term; unsigned idx; - typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t> super; + typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t, BB_*, + BB_*> super; typedef SuccIterator<Term_, BB_> Self; inline bool index_is_valid(int idx) { @@ -112,6 +115,7 @@ class SuccIterator : public std::iterator<std::bidirectional_iterator_tag, public: typedef typename super::pointer pointer; + typedef typename super::reference reference; // TODO: This can be random access iterator, only operator[] missing. explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator @@ -142,7 +146,7 @@ public: inline bool operator==(const Self& x) const { return idx == x.idx; } inline bool operator!=(const Self& x) const { return !operator==(x); } - inline pointer operator*() const { return Term->getSuccessor(idx); } + inline reference operator*() const { return Term->getSuccessor(idx); } inline pointer operator->() const { return operator*(); } inline Self& operator++() { ++idx; return *this; } // Preincrement diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h index 3ce0fef4a0..92107ac025 100644 --- a/include/llvm/Support/CallSite.h +++ b/include/llvm/Support/CallSite.h @@ -28,7 +28,6 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/IR/Attributes.h" -#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Instructions.h" diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h index 80f09db4f7..0d2d6c92fd 100644 --- a/include/llvm/Support/Casting.h +++ b/include/llvm/Support/Casting.h @@ -36,9 +36,13 @@ template<typename From> struct simplify_type { }; template<typename From> struct simplify_type<const From> { - typedef const From SimpleType; - static SimpleType &getSimplifiedValue(const From &Val) { - return simplify_type<From>::getSimplifiedValue(static_cast<From&>(Val)); + typedef typename simplify_type<From>::SimpleType NonConstSimpleType; + typedef typename add_const_past_pointer<NonConstSimpleType>::type + SimpleType; + typedef typename add_lvalue_reference_if_not_pointer<SimpleType>::type + RetType; + static RetType getSimplifiedValue(const From& Val) { + return simplify_type<From>::getSimplifiedValue(const_cast<From&>(Val)); } }; @@ -81,6 +85,13 @@ template <typename To, typename From> struct isa_impl_cl<To, From*> { } }; +template <typename To, typename From> struct isa_impl_cl<To, From*const> { + static inline bool doit(const From *Val) { + assert(Val && "isa<> used on a null pointer"); + return isa_impl<To, From>::doit(*Val); + } +}; + template <typename To, typename From> struct isa_impl_cl<To, const From*> { static inline bool doit(const From *Val) { assert(Val && "isa<> used on a null pointer"); @@ -102,7 +113,7 @@ struct isa_impl_wrap { static bool doit(const From &Val) { return isa_impl_wrap<To, SimpleFrom, typename simplify_type<SimpleFrom>::SimpleType>::doit( - simplify_type<From>::getSimplifiedValue(Val)); + simplify_type<const From>::getSimplifiedValue(Val)); } }; @@ -121,7 +132,8 @@ struct isa_impl_wrap<To, FromTy, FromTy> { // template <class X, class Y> inline bool isa(const Y &Val) { - return isa_impl_wrap<X, Y, typename simplify_type<Y>::SimpleType>::doit(Val); + return isa_impl_wrap<X, const Y, + typename simplify_type<const Y>::SimpleType>::doit(Val); } //===----------------------------------------------------------------------===// @@ -178,7 +190,7 @@ struct cast_retty { // template<class To, class From, class SimpleFrom> struct cast_convert_val { // This is not a simple type, use the template to simplify it... - static typename cast_retty<To, From>::ret_type doit(const From &Val) { + static typename cast_retty<To, From>::ret_type doit(From &Val) { return cast_convert_val<To, SimpleFrom, typename simplify_type<SimpleFrom>::SimpleType>::doit( simplify_type<From>::getSimplifiedValue(Val)); @@ -204,20 +216,14 @@ template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> { // cast<Instruction>(myVal)->getParent() // template <class X, class Y> -inline typename enable_if_c< - !is_same<Y, typename simplify_type<Y>::SimpleType>::value, - typename cast_retty<X, Y>::ret_type ->::type cast(const Y &Val) { +inline typename cast_retty<X, const Y>::ret_type cast(const Y &Val) { assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!"); - return cast_convert_val<X, Y, - typename simplify_type<Y>::SimpleType>::doit(Val); + return cast_convert_val<X, const Y, + typename simplify_type<const Y>::SimpleType>::doit(Val); } template <class X, class Y> -inline typename enable_if< - is_same<Y, typename simplify_type<Y>::SimpleType>, - typename cast_retty<X, Y>::ret_type ->::type cast(Y &Val) { +inline typename cast_retty<X, Y>::ret_type cast(Y &Val) { assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!"); return cast_convert_val<X, Y, typename simplify_type<Y>::SimpleType>::doit(Val); @@ -253,18 +259,12 @@ inline typename cast_retty<X, Y*>::ret_type cast_or_null(Y *Val) { // template <class X, class Y> -inline typename enable_if_c< - !is_same<Y, typename simplify_type<Y>::SimpleType>::value, - typename cast_retty<X, Y>::ret_type ->::type dyn_cast(const Y &Val) { +inline typename cast_retty<X, const Y>::ret_type dyn_cast(const Y &Val) { return isa<X>(Val) ? cast<X>(Val) : 0; } template <class X, class Y> -inline typename enable_if< - is_same<Y, typename simplify_type<Y>::SimpleType>, - typename cast_retty<X, Y>::ret_type ->::type dyn_cast(Y &Val) { +inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) { return isa<X>(Val) ? cast<X>(Val) : 0; } diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 25f42a98e7..13d057be04 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -351,4 +351,14 @@ #define LLVM_EXPLICIT #endif +/// \macro LLVM_STATIC_ASSERT +/// \brief Expands to C/C++'s static_assert on compilers which support it. +#if __has_feature(cxx_static_assert) +# define LLVM_STATIC_ASSERT(expr, msg) static_assert(expr, msg) +#elif __has_feature(c_static_assert) +# define LLVM_STATIC_ASSERT(expr, msg) _Static_assert(expr, msg) +#else +# define LLVM_STATIC_ASSERT(expr, msg) +#endif + #endif diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h index 3596be87e3..f35d407292 100644 --- a/include/llvm/Support/DebugLoc.h +++ b/include/llvm/Support/DebugLoc.h @@ -9,7 +9,7 @@ // // This file defines a number of light weight data structures used // to describe and track debug location information. -// +// //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_DEBUGLOC_H @@ -19,7 +19,7 @@ namespace llvm { template <typename T> struct DenseMapInfo; class MDNode; class LLVMContext; - + /// DebugLoc - Debug location id. This is carried by Instruction, SDNode, /// and MachineInstr to compactly encode file/line/scope information for an /// operation. @@ -46,18 +46,18 @@ namespace llvm { /// location, encoded as 24-bits for line and 8 bits for col. A value of 0 /// for either means unknown. unsigned LineCol; - + /// ScopeIdx - This is an opaque ID# for Scope/InlinedAt information, /// decoded by LLVMContext. 0 is unknown. int ScopeIdx; public: DebugLoc() : LineCol(0), ScopeIdx(0) {} // Defaults to unknown. - + /// get - Get a new DebugLoc that corresponds to the specified line/col /// scope/inline location. static DebugLoc get(unsigned Line, unsigned Col, MDNode *Scope, MDNode *InlinedAt = 0); - + /// getFromDILocation - Translate the DILocation quad into a DebugLoc. static DebugLoc getFromDILocation(MDNode *N); @@ -66,32 +66,32 @@ namespace llvm { /// isUnknown - Return true if this is an unknown location. bool isUnknown() const { return ScopeIdx == 0; } - + unsigned getLine() const { return (LineCol << 8) >> 8; // Mask out column. } - + unsigned getCol() const { return LineCol >> 24; } - + /// getScope - This returns the scope pointer for this DebugLoc, or null if /// invalid. MDNode *getScope(const LLVMContext &Ctx) const; - + /// getInlinedAt - This returns the InlinedAt pointer for this DebugLoc, or /// null if invalid or not present. MDNode *getInlinedAt(const LLVMContext &Ctx) const; - + /// getScopeAndInlinedAt - Return both the Scope and the InlinedAt values. void getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA, const LLVMContext &Ctx) const; - - + + /// getAsMDNode - This method converts the compressed DebugLoc node into a /// DILocation compatible MDNode. MDNode *getAsMDNode(const LLVMContext &Ctx) const; - + bool operator==(const DebugLoc &DL) const { return LineCol == DL.LineCol && ScopeIdx == DL.ScopeIdx; } diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index 8e6b91e00e..ea597fc1a2 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -480,6 +480,8 @@ enum { R_PPC64_TOC16_LO = 48, R_PPC64_TOC16_HA = 50, R_PPC64_TOC = 51, + R_PPC64_ADDR16_DS = 56, + R_PPC64_ADDR16_LO_DS = 57, R_PPC64_TOC16_DS = 63, R_PPC64_TOC16_LO_DS = 64, R_PPC64_TLS = 67, @@ -588,6 +590,8 @@ enum { // ARM Specific e_flags enum { + EF_ARM_SOFT_FLOAT = 0x00000200U, + EF_ARM_VFP_FLOAT = 0x00000400U, EF_ARM_EABI_UNKNOWN = 0x00000000U, EF_ARM_EABI_VER1 = 0x01000000U, EF_ARM_EABI_VER2 = 0x02000000U, diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h index ca5dec0173..b948d97bff 100644 --- a/include/llvm/Support/ErrorHandling.h +++ b/include/llvm/Support/ErrorHandling.h @@ -24,7 +24,8 @@ namespace llvm { /// An error handler callback. typedef void (*fatal_error_handler_t)(void *user_data, - const std::string& reason); + const std::string& reason, + bool gen_crash_diag); /// install_fatal_error_handler - Installs a new error handler to be used /// whenever a serious (non-recoverable) error is encountered by LLVM. @@ -73,10 +74,14 @@ namespace llvm { /// standard error, followed by a newline. /// After the error handler is called this function will call exit(1), it /// does not return. - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason); - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason); - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason); - LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason); + LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, + bool gen_crash_diag = true); + LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason, + bool gen_crash_diag = true); + LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason, + bool gen_crash_diag = true); + LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason, + bool gen_crash_diag = true); /// This function calls abort(), and prints the optional message to stderr. /// Use the llvm_unreachable macro (that adds location info), instead of diff --git a/include/llvm/Support/FEnv.h b/include/llvm/Support/FEnv.h index d9cf7247a3..8560ee0a8a 100644 --- a/include/llvm/Support/FEnv.h +++ b/include/llvm/Support/FEnv.h @@ -32,7 +32,7 @@ namespace sys { /// llvm_fenv_clearexcept - Clear the floating-point exception state. static inline void llvm_fenv_clearexcept() { -#ifdef HAVE_FENV_H +#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT feclearexcept(FE_ALL_EXCEPT); #endif errno = 0; @@ -43,7 +43,7 @@ static inline bool llvm_fenv_testexcept() { int errno_val = errno; if (errno_val == ERANGE || errno_val == EDOM) return true; -#ifdef HAVE_FENV_H +#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT)) return true; #endif diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index bae3a5a608..ffa642787b 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -602,7 +602,7 @@ private: void *FileMappingHandle; #endif - error_code init(int FD, uint64_t Offset); + error_code init(int FD, bool CloseFD, uint64_t Offset); public: typedef char char_type; @@ -633,8 +633,10 @@ public: error_code &ec); /// \param fd An open file descriptor to map. mapped_file_region takes - /// ownership. It must have been opended in the correct mode. + /// ownership if closefd is true. It must have been opended in the correct + /// mode. mapped_file_region(int fd, + bool closefd, mapmode mode, uint64_t length, uint64_t offset, diff --git a/include/llvm/Support/FormattedStream.h b/include/llvm/Support/FormattedStream.h index 21635dcfb6..2e4bd5aeca 100644 --- a/include/llvm/Support/FormattedStream.h +++ b/include/llvm/Support/FormattedStream.h @@ -17,125 +17,125 @@ #include "llvm/Support/raw_ostream.h" -namespace llvm -{ - /// formatted_raw_ostream - Formatted raw_fd_ostream to handle - /// asm-specific constructs. +namespace llvm { + +/// formatted_raw_ostream - A raw_ostream that wraps another one and keeps track +/// of column position, allowing padding out to specific column boundaries. +/// +class formatted_raw_ostream : public raw_ostream { +public: + /// DELETE_STREAM - Tell the destructor to delete the held stream. /// - class formatted_raw_ostream : public raw_ostream { - public: - /// DELETE_STREAM - Tell the destructor to delete the held stream. - /// - static const bool DELETE_STREAM = true; - - /// PRESERVE_STREAM - Tell the destructor to not delete the held - /// stream. - /// - static const bool PRESERVE_STREAM = false; - - private: - /// TheStream - The real stream we output to. We set it to be - /// unbuffered, since we're already doing our own buffering. - /// - raw_ostream *TheStream; - - /// DeleteStream - Do we need to delete TheStream in the - /// destructor? - /// - bool DeleteStream; - - /// ColumnScanned - The current output column of the data that's - /// been flushed and the portion of the buffer that's been - /// scanned. The column scheme is zero-based. - /// - unsigned ColumnScanned; - - /// Scanned - This points to one past the last character in the - /// buffer we've scanned. - /// - const char *Scanned; - - virtual void write_impl(const char *Ptr, size_t Size) LLVM_OVERRIDE; - - /// current_pos - Return the current position within the stream, - /// not counting the bytes currently in the buffer. - virtual uint64_t current_pos() const LLVM_OVERRIDE { - // Our current position in the stream is all the contents which have been - // written to the underlying stream (*not* the current position of the - // underlying stream). - return TheStream->tell(); - } - - /// ComputeColumn - Examine the given output buffer and figure out which - /// column we end up in after output. - /// - void ComputeColumn(const char *Ptr, size_t size); - - public: - /// formatted_raw_ostream - Open the specified file for - /// writing. If an error occurs, information about the error is - /// put into ErrorInfo, and the stream should be immediately - /// destroyed; the string will be empty if no error occurred. - /// - /// As a side effect, the given Stream is set to be Unbuffered. - /// This is because formatted_raw_ostream does its own buffering, - /// so it doesn't want another layer of buffering to be happening - /// underneath it. - /// - formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) - : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) { - setStream(Stream, Delete); - } - explicit formatted_raw_ostream() - : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) { - Scanned = 0; - } - - ~formatted_raw_ostream() { - flush(); - releaseStream(); - } - - void setStream(raw_ostream &Stream, bool Delete = false) { - releaseStream(); - - TheStream = &Stream; - DeleteStream = Delete; - - // This formatted_raw_ostream inherits from raw_ostream, so it'll do its - // own buffering, and it doesn't need or want TheStream to do another - // layer of buffering underneath. Resize the buffer to what TheStream - // had been using, and tell TheStream not to do its own buffering. - if (size_t BufferSize = TheStream->GetBufferSize()) - SetBufferSize(BufferSize); - else - SetUnbuffered(); - TheStream->SetUnbuffered(); + static const bool DELETE_STREAM = true; + + /// PRESERVE_STREAM - Tell the destructor to not delete the held + /// stream. + /// + static const bool PRESERVE_STREAM = false; + +private: + /// TheStream - The real stream we output to. We set it to be + /// unbuffered, since we're already doing our own buffering. + /// + raw_ostream *TheStream; - Scanned = 0; - } - - /// PadToColumn - Align the output to some column number. If the current - /// column is already equal to or more than NewCol, PadToColumn inserts one - /// space. - /// - /// \param NewCol - The column to move to. - formatted_raw_ostream &PadToColumn(unsigned NewCol); - - private: - void releaseStream() { - // Delete the stream if needed. Otherwise, transfer the buffer - // settings from this raw_ostream back to the underlying stream. - if (!TheStream) - return; - if (DeleteStream) - delete TheStream; - else if (size_t BufferSize = GetBufferSize()) - TheStream->SetBufferSize(BufferSize); - else - TheStream->SetUnbuffered(); - } - }; + /// DeleteStream - Do we need to delete TheStream in the + /// destructor? + /// + bool DeleteStream; + + /// ColumnScanned - The current output column of the data that's + /// been flushed and the portion of the buffer that's been + /// scanned. The column scheme is zero-based. + /// + unsigned ColumnScanned; + + /// Scanned - This points to one past the last character in the + /// buffer we've scanned. + /// + const char *Scanned; + + virtual void write_impl(const char *Ptr, size_t Size) LLVM_OVERRIDE; + + /// current_pos - Return the current position within the stream, + /// not counting the bytes currently in the buffer. + virtual uint64_t current_pos() const LLVM_OVERRIDE { + // Our current position in the stream is all the contents which have been + // written to the underlying stream (*not* the current position of the + // underlying stream). + return TheStream->tell(); + } + + /// ComputeColumn - Examine the given output buffer and figure out which + /// column we end up in after output. + /// + void ComputeColumn(const char *Ptr, size_t size); + +public: + /// formatted_raw_ostream - Open the specified file for + /// writing. If an error occurs, information about the error is + /// put into ErrorInfo, and the stream should be immediately + /// destroyed; the string will be empty if no error occurred. + /// + /// As a side effect, the given Stream is set to be Unbuffered. + /// This is because formatted_raw_ostream does its own buffering, + /// so it doesn't want another layer of buffering to be happening + /// underneath it. + /// + formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) + : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) { + setStream(Stream, Delete); + } + explicit formatted_raw_ostream() + : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) { + Scanned = 0; + } + + ~formatted_raw_ostream() { + flush(); + releaseStream(); + } + + void setStream(raw_ostream &Stream, bool Delete = false) { + releaseStream(); + + TheStream = &Stream; + DeleteStream = Delete; + + // This formatted_raw_ostream inherits from raw_ostream, so it'll do its + // own buffering, and it doesn't need or want TheStream to do another + // layer of buffering underneath. Resize the buffer to what TheStream + // had been using, and tell TheStream not to do its own buffering. + if (size_t BufferSize = TheStream->GetBufferSize()) + SetBufferSize(BufferSize); + else + SetUnbuffered(); + TheStream->SetUnbuffered(); + + Scanned = 0; + } + + /// PadToColumn - Align the output to some column number. If the current + /// column is already equal to or more than NewCol, PadToColumn inserts one + /// space. + /// + /// \param NewCol - The column to move to. + formatted_raw_ostream &PadToColumn(unsigned NewCol); + +private: + void releaseStream() { + // Delete the stream if needed. Otherwise, transfer the buffer + // settings from this raw_ostream back to the underlying stream. + if (!TheStream) + return; + if (DeleteStream) + delete TheStream; + else if (size_t BufferSize = GetBufferSize()) + TheStream->SetBufferSize(BufferSize); + else + TheStream->SetUnbuffered(); + } +}; /// fouts() - This returns a reference to a formatted_raw_ostream for /// standard output. Use it like: fouts() << "foo" << "bar"; diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h deleted file mode 100644 index 6d8a9b30ae..0000000000 --- a/include/llvm/Support/IRReader.h +++ /dev/null @@ -1,112 +0,0 @@ -//===---- llvm/Support/IRReader.h - Reader for LLVM IR files ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines functions for reading LLVM IR. They support both -// Bitcode and Assembly, automatically detecting the input format. -// -// These functions must be defined in a header file in order to avoid -// library dependencies, since they reference both Bitcode and Assembly -// functions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_SUPPORT_IRREADER_H -#define LLVM_SUPPORT_IRREADER_H - -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Assembly/Parser.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/system_error.h" - -namespace llvm { - - /// If the given MemoryBuffer holds a bitcode image, return a Module for it - /// which does lazy deserialization of function bodies. Otherwise, attempt to - /// parse it as LLVM Assembly and return a fully populated Module. This - /// function *always* takes ownership of the given MemoryBuffer. - inline Module *getLazyIRModule(MemoryBuffer *Buffer, - SMDiagnostic &Err, - LLVMContext &Context) { - if (isBitcode((const unsigned char *)Buffer->getBufferStart(), - (const unsigned char *)Buffer->getBufferEnd())) { - std::string ErrMsg; - Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg); - if (M == 0) { - Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, - ErrMsg); - // ParseBitcodeFile does not take ownership of the Buffer in the - // case of an error. - delete Buffer; - } - return M; - } - - return ParseAssembly(Buffer, 0, Err, Context); - } - - /// If the given file holds a bitcode image, return a Module - /// for it which does lazy deserialization of function bodies. Otherwise, - /// attempt to parse it as LLVM Assembly and return a fully populated - /// Module. - inline Module *getLazyIRFileModule(const std::string &Filename, - SMDiagnostic &Err, - LLVMContext &Context) { - OwningPtr<MemoryBuffer> File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { - Err = SMDiagnostic(Filename, SourceMgr::DK_Error, - "Could not open input file: " + ec.message()); - return 0; - } - - return getLazyIRModule(File.take(), Err, Context); - } - - /// If the given MemoryBuffer holds a bitcode image, return a Module - /// for it. Otherwise, attempt to parse it as LLVM Assembly and return - /// a Module for it. This function *always* takes ownership of the given - /// MemoryBuffer. - inline Module *ParseIR(MemoryBuffer *Buffer, - SMDiagnostic &Err, - LLVMContext &Context) { - if (isBitcode((const unsigned char *)Buffer->getBufferStart(), - (const unsigned char *)Buffer->getBufferEnd())) { - std::string ErrMsg; - Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg); - if (M == 0) - Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, - ErrMsg); - // ParseBitcodeFile does not take ownership of the Buffer. - delete Buffer; - return M; - } - - return ParseAssembly(Buffer, 0, Err, Context); - } - - /// If the given file holds a bitcode image, return a Module for it. - /// Otherwise, attempt to parse it as LLVM Assembly and return a Module - /// for it. - inline Module *ParseIRFile(const std::string &Filename, - SMDiagnostic &Err, - LLVMContext &Context) { - OwningPtr<MemoryBuffer> File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { - Err = SMDiagnostic(Filename, SourceMgr::DK_Error, - "Could not open input file: " + ec.message()); - return 0; - } - - return ParseIR(File.take(), Err, Context); - } - -} - -#endif diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h index a0cc27c024..bf650112f2 100644 --- a/include/llvm/Support/Program.h +++ b/include/llvm/Support/Program.h @@ -125,7 +125,8 @@ namespace sys { const sys::Path** redirects = 0, unsigned secondsToWait = 0, unsigned memoryLimit = 0, - std::string* ErrMsg = 0); + std::string* ErrMsg = 0, + bool *ExecutionFailed = 0); /// A convenience function equivalent to Program prg; prg.Execute(..); /// @see Execute diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h index db44995d95..b49341c3ff 100644 --- a/include/llvm/Support/ValueHandle.h +++ b/include/llvm/Support/ValueHandle.h @@ -20,6 +20,7 @@ namespace llvm { class ValueHandleBase; +template<typename From> struct simplify_type; // ValueHandleBase** is only 4-byte aligned. template<> @@ -162,14 +163,12 @@ public: // Specialize simplify_type to allow WeakVH to participate in // dyn_cast, isa, etc. -template<typename From> struct simplify_type; -template<> struct simplify_type<const WeakVH> { +template<> struct simplify_type<WeakVH> { typedef Value* SimpleType; - static SimpleType getSimplifiedValue(const WeakVH &WVH) { - return static_cast<Value *>(WVH); + static SimpleType getSimplifiedValue(WeakVH &WVH) { + return WVH; } }; -template<> struct simplify_type<WeakVH> : public simplify_type<const WeakVH> {}; /// AssertingVH - This is a Value Handle that points to a value and asserts out /// if the value is destroyed while the handle is still live. This is very @@ -236,18 +235,6 @@ public: ValueTy &operator*() const { return *getValPtr(); } }; -// Specialize simplify_type to allow AssertingVH to participate in -// dyn_cast, isa, etc. -template<typename From> struct simplify_type; -template<> struct simplify_type<const AssertingVH<Value> > { - typedef Value* SimpleType; - static SimpleType getSimplifiedValue(const AssertingVH<Value> &AVH) { - return static_cast<Value *>(AVH); - } -}; -template<> struct simplify_type<AssertingVH<Value> > - : public simplify_type<const AssertingVH<Value> > {}; - // Specialize DenseMapInfo to allow AssertingVH to participate in DenseMap. template<typename T> struct DenseMapInfo<AssertingVH<T> > { @@ -345,18 +332,6 @@ public: ValueTy &operator*() const { return *getValPtr(); } }; -// Specialize simplify_type to allow TrackingVH to participate in -// dyn_cast, isa, etc. -template<typename From> struct simplify_type; -template<> struct simplify_type<const TrackingVH<Value> > { - typedef Value* SimpleType; - static SimpleType getSimplifiedValue(const TrackingVH<Value> &AVH) { - return static_cast<Value *>(AVH); - } -}; -template<> struct simplify_type<TrackingVH<Value> > - : public simplify_type<const TrackingVH<Value> > {}; - /// CallbackVH - This is a value handle that allows subclasses to define /// callbacks that run when the underlying Value has RAUW called on it or is /// destroyed. This class can be used as the key of a map, as long as the user @@ -399,18 +374,6 @@ public: virtual void allUsesReplacedWith(Value *); }; -// Specialize simplify_type to allow CallbackVH to participate in -// dyn_cast, isa, etc. -template<typename From> struct simplify_type; -template<> struct simplify_type<const CallbackVH> { - typedef Value* SimpleType; - static SimpleType getSimplifiedValue(const CallbackVH &CVH) { - return static_cast<Value *>(CVH); - } -}; -template<> struct simplify_type<CallbackVH> - : public simplify_type<const CallbackVH> {}; - } // End llvm namespace #endif diff --git a/include/llvm/Support/Watchdog.h b/include/llvm/Support/Watchdog.h new file mode 100644 index 0000000000..b58496b2fb --- /dev/null +++ b/include/llvm/Support/Watchdog.h @@ -0,0 +1,38 @@ +//===--- Watchdog.h - Watchdog timer ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the llvm::sys::Watchdog class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_WATCHDOG_H +#define LLVM_SUPPORT_WATCHDOG_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + namespace sys { + + /// This class provides an abstraction for a timeout around an operation + /// that must complete in a given amount of time. Failure to complete before + /// the timeout is an unrecoverable situation and no mechanisms to attempt + /// to handle it are provided. + class Watchdog { + public: + Watchdog(unsigned int seconds); + ~Watchdog(); + private: + // Noncopyable. + Watchdog(const Watchdog &other) LLVM_DELETED_FUNCTION; + Watchdog &operator=(const Watchdog &other) LLVM_DELETED_FUNCTION; + }; + } +} + +#endif diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h index db43ccfece..906e97c91f 100644 --- a/include/llvm/Support/type_traits.h +++ b/include/llvm/Support/type_traits.h @@ -209,6 +209,26 @@ template <typename T> struct remove_pointer<T*volatile> { typedef T type; }; template <typename T> struct remove_pointer<T*const volatile> { typedef T type; }; +// If T is a pointer, just return it. If it is not, return T&. +template<typename T, typename Enable = void> +struct add_lvalue_reference_if_not_pointer { typedef T &type; }; + +template<typename T> +struct add_lvalue_reference_if_not_pointer<T, + typename enable_if<is_pointer<T> >::type> { + typedef T type; +}; + +// If T is a pointer to X, return a pointer to const X. If it is not, return +// const T. +template<typename T, typename Enable = void> +struct add_const_past_pointer { typedef const T type; }; + +template<typename T> +struct add_const_past_pointer<T, typename enable_if<is_pointer<T> >::type> { + typedef const typename remove_pointer<T>::type *type; +}; + template <bool, typename T, typename F> struct conditional { typedef T type; }; diff --git a/include/llvm/TableGen/Error.h b/include/llvm/TableGen/Error.h index 2f6b7e625c..2d0a2b45a9 100644 --- a/include/llvm/TableGen/Error.h +++ b/include/llvm/TableGen/Error.h @@ -32,6 +32,7 @@ LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const std::string &Msg); extern SourceMgr SrcMgr; +extern unsigned ErrorsPrinted; } // end namespace "llvm" diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h index 3cf4f1f054..76ee69dd8d 100644 --- a/include/llvm/TableGen/Record.h +++ b/include/llvm/TableGen/Record.h @@ -1559,6 +1559,11 @@ public: /// Init *getValueInit(StringRef FieldName) const; + /// Return true if the named field is unset. + bool isValueUnset(StringRef FieldName) const { + return getValueInit(FieldName) == UnsetInit::get(); + } + /// getValueAsString - This method looks up the specified field and returns /// its value as a string, throwing an exception if the field does not exist /// or if the value is not a string. diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 876479bf8a..deee2eb6de 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -397,6 +397,9 @@ class Instruction { InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling. + // Scheduling information from TargetSchedule.td. + list<SchedReadWrite> SchedRW; + string Constraints = ""; // OperandConstraint, e.g. $src = $dst. /// DisableEncoding - List of operand names (e.g. "$op1,$op2") that should not diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h index 58bfcec07a..d5f30f40ad 100644 --- a/include/llvm/Target/TargetFrameLowering.h +++ b/include/llvm/Target/TargetFrameLowering.h @@ -194,7 +194,8 @@ public: /// finalized. Once the frame is finalized, MO_FrameIndex operands are /// replaced with direct constants. This method is optional. /// - virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const { + virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const { } /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index e3e5737300..1786bd28f3 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -694,13 +694,6 @@ public: return false; } - /// This function returns true if the target would benefit from code placement - /// optimization. - /// @brief Determine if the target should perform code placement optimization. - bool shouldOptimizeCodePlacement() const { - return BenefitFromCodePlacementOpt; - } - /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination @@ -958,6 +951,13 @@ protected: RegClassForVT[VT.SimpleTy] = RC; } + /// clearRegisterClasses - remove all register classes + void clearRegisterClasses() { + for (unsigned i = 0 ; i<array_lengthof(RegClassForVT); i++) + RegClassForVT[i] = 0; + AvailableRegClasses.clear(); + } + /// findRepresentativeClass - Return the largest legal super-reg register class /// of the register class for the specified type and its associated "cost". virtual std::pair<const TargetRegisterClass*, uint8_t> @@ -1637,10 +1637,6 @@ protected: /// to memmove, used for functions with OpSize attribute. unsigned MaxStoresPerMemmoveOptSize; - /// This field specifies whether the target can benefit from code placement - /// optimization. - bool BenefitFromCodePlacementOpt; - /// PredictableSelectIsExpensive - Tells the code generator that select is /// more expensive than a branch if the branch is usually predicted right. bool PredictableSelectIsExpensive; diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 35cf20a702..66f3a3c71b 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -95,7 +95,10 @@ public: /// a reference to that target's TargetSubtargetInfo-derived member variable. virtual const TargetSubtargetInfo *getSubtargetImpl() const { return 0; } - TargetOptions Options; + mutable TargetOptions Options; + + /// \brief Reset the target options based on the function's attributes. + void resetTargetOptions(const MachineFunction *MF) const; // Interfaces to the major aspects of target machine information: // -- Instruction opcode and operand information diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index b7920bae8a..660d2c48b6 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -133,6 +133,11 @@ def EponymousProcResourceKind : ProcResourceKind; class ProcResource<int num> : ProcResourceKind, ProcResourceUnits<EponymousProcResourceKind, num>; +class ProcResGroup<list<ProcResource> resources> : ProcResourceKind { + list<ProcResource> Resources = resources; + SchedMachineModel SchedModel = ?; +} + // A target architecture may define SchedReadWrite types and associate // them with instruction operands. class SchedReadWrite; diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index f96b5b3f77..4aae200dd0 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -31,20 +31,40 @@ ModulePass *createOptimalEdgeProfilerPass(); ModulePass *createPathProfilerPass(); // Insert GCOV profiling instrumentation -static const char DefaultGCovVersion[4] = {'*', '2', '0', '4'}; -ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true, - const char (&Version)[4] =DefaultGCovVersion, - bool UseExtraChecksum = false, - bool NoRedZone = false, - bool NoFunctionNamesInData = false); +struct GCOVOptions { + static GCOVOptions getDefault(); + + // Specify whether to emit .gcno files. + bool EmitNotes; + + // Specify whether to modify the program to emit .gcda files when run. + bool EmitData; + + // A four-byte version string. The meaning of a version string is described in + // gcc's gcov-io.h + char Version[4]; + + // Emit a "cfg checksum" that follows the "line number checksum" of a + // function. This affects both .gcno and .gcda files. + bool UseCfgChecksum; + + // Add the 'noredzone' attribute to added runtime library calls. + bool NoRedZone; + + // Emit the name of the function in the .gcda files. This is redundant, as + // the function identifier can be used to find the name from the .gcno file. + bool FunctionNamesInData; +}; +ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = + GCOVOptions::getDefault()); // Insert AddressSanitizer (address sanity checking) instrumentation FunctionPass *createAddressSanitizerFunctionPass( - bool CheckInitOrder = false, bool CheckUseAfterReturn = false, + bool CheckInitOrder = true, bool CheckUseAfterReturn = false, bool CheckLifetime = false, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false); ModulePass *createAddressSanitizerModulePass( - bool CheckInitOrder = false, StringRef BlacklistFile = StringRef(), + bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false); // Insert MemorySanitizer instrumentation (detection of uninitialized reads) @@ -54,7 +74,6 @@ FunctionPass *createMemorySanitizerPass(bool TrackOrigins = false, // Insert ThreadSanitizer (race detection) instrumentation FunctionPass *createThreadSanitizerPass(StringRef BlacklistFile = StringRef()); - // BoundsChecking - This pass instruments the code to perform run-time bounds // checking on loads, stores, and other memory intrinsics. FunctionPass *createBoundsCheckingPass(); diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index e89759a097..e833aaa6d6 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -342,7 +342,7 @@ extern char &InstructionSimplifierID; //===----------------------------------------------------------------------===// // -// LowerExpectIntriniscs - Removes llvm.expect intrinsics and creates +// LowerExpectIntrinsics - Removes llvm.expect intrinsics and creates // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index e58dde3d93..a571463dfe 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -44,6 +44,8 @@ static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden); static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden); static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden); +static cl::opt<bool> EvalTBAA("evaluate-tbaa", cl::ReallyHidden); + namespace { class AAEval : public FunctionPass { unsigned NoAlias, MayAlias, PartialAlias, MustAlias; @@ -123,6 +125,15 @@ PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, } } +static inline void +PrintLoadStoreResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + errs() << " " << Msg << ": " << *V1 + << " <-> " << *V2 << '\n'; + } +} + static inline bool isInterestingPointer(Value *V) { return V->getType()->isPointerTy() && !isa<ConstantPointerNull>(V); @@ -133,6 +144,8 @@ bool AAEval::runOnFunction(Function &F) { SetVector<Value *> Pointers; SetVector<CallSite> CallSites; + SetVector<Value *> Loads; + SetVector<Value *> Stores; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) // Add all pointer arguments. @@ -141,6 +154,10 @@ bool AAEval::runOnFunction(Function &F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); + if (EvalTBAA && isa<LoadInst>(&*I)) + Loads.insert(&*I); + if (EvalTBAA && isa<StoreInst>(&*I)) + Stores.insert(&*I); Instruction &Inst = *I; if (CallSite CS = cast<Value>(&Inst)) { Value *Callee = CS.getCalledValue(); @@ -197,6 +214,61 @@ bool AAEval::runOnFunction(Function &F) { } } + if (EvalTBAA) { + // iterate over all pairs of load, store + for (SetVector<Value *>::iterator I1 = Loads.begin(), E = Loads.end(); + I1 != E; ++I1) { + for (SetVector<Value *>::iterator I2 = Stores.begin(), E2 = Stores.end(); + I2 != E2; ++I2) { + switch (AA.alias(AA.getLocation(cast<LoadInst>(*I1)), + AA.getLocation(cast<StoreInst>(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + + // iterate over all pairs of store, store + for (SetVector<Value *>::iterator I1 = Stores.begin(), E = Stores.end(); + I1 != E; ++I1) { + for (SetVector<Value *>::iterator I2 = Stores.begin(); I2 != I1; ++I2) { + switch (AA.alias(AA.getLocation(cast<StoreInst>(*I1)), + AA.getLocation(cast<StoreInst>(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + } + // Mod/ref alias analysis: compare all pairs of calls and values for (SetVector<CallSite>::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 4139336f26..ae6da1af0c 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -851,9 +851,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) { + // Do the base pointers alias? + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, + UnderlyingV2, UnknownSize, 0); + // Check for geps of non-aliasing underlying pointers where the offsets are // identical. - if (V1Size == V2Size) { + if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1TBAAInfo, UnderlyingV2, @@ -881,10 +885,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, GEP1VariableIndices.clear(); } } - - // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, - UnderlyingV2, UnknownSize, 0); // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 1faa04623e..2240e9de33 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file implements an analysis that determines, for a given memory -// operation, what preceding memory operations it depends on. It builds on +// operation, what preceding memory operations it depends on. It builds on // alias analysis information, and tries to provide a lazy, caching interface to // a common kind of alias information query. // @@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr, static const int BlockScanLimit = 500; char MemoryDependenceAnalysis::ID = 0; - + // Register this pass... INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { /// RemoveFromReverseMap - This is a helper function that removes Val from /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. template <typename KeyTy> -static void RemoveFromReverseMap(DenseMap<Instruction*, +static void RemoveFromReverseMap(DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> > &ReverseMap, Instruction *Inst, KeyTy Val) { typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator @@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (LI->isUnordered()) { Loc = AA->getLocation(LI); return AliasAnalysis::Ref; - } else if (LI->getOrdering() == Monotonic) { + } + if (LI->getOrdering() == Monotonic) { Loc = AA->getLocation(LI); return AliasAnalysis::ModRef; } @@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (SI->isUnordered()) { Loc = AA->getLocation(SI); return AliasAnalysis::Mod; - } else if (SI->getOrdering() == Monotonic) { + } + if (SI->getOrdering() == Monotonic) { Loc = AA->getLocation(SI); return AliasAnalysis::ModRef; } @@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { // Limit the amount of scanning we do so we don't end up with quadratic - // running time on extreme testcases. + // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); Instruction *Inst = --ScanIt; - + // If this inst is a memory op, get the pointer it accessed AliasAnalysis::Location Loc; AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); @@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, /// /// MemLocBase, MemLocOffset are lazily computed here the first time the /// base/offs of memloc is needed. -static bool +static bool isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, int64_t &MemLocOffs, @@ -289,25 +291,25 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, if (LI->getParent()->getParent()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) return 0; - + // Get the base of this load. int64_t LIOffs = 0; - const Value *LIBase = + const Value *LIBase = GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); - + // If the two pointers are not based on the same pointer, we can't tell that // they are related. if (LIBase != MemLocBase) return 0; - + // Okay, the two values are based on the same pointer, but returned as // no-alias. This happens when we have things like two byte loads at "P+1" // and "P+3". Check to see if increasing the size of the "LI" load up to its // alignment (or the largest native integer type) will allow us to load all // the bits required by MemLoc. - + // If MemLoc is before LI, then no widening of LI will help us out. if (MemLocOffs < LIOffs) return 0; - + // Get the alignment of the load in bytes. We assume that it is safe to load // any legal integer up to this size without a problem. For example, if we're // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can @@ -316,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned LoadAlign = LI->getAlignment(); int64_t MemLocEnd = MemLocOffs+MemLocSize; - + // If no amount of rounding up will let MemLoc fit into LI, then bail out. if (LIOffs+LoadAlign < MemLocEnd) return 0; - + // This is the size of the load to try. Start with the next larger power of // two. unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); - + while (1) { // If this load size is bigger than our known alignment or would not fit // into a native integer register, then we fail. @@ -343,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) return NewLoadByteSize; - + NewLoadByteSize <<= 1; } } @@ -355,7 +357,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, /// instruction as well; this function may take advantage of the metadata /// annotated to the query instruction to refine the result. MemDepResult MemoryDependenceAnalysis:: -getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { @@ -382,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa<DbgInfoIntrinsic>(II)) continue; - + // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { @@ -406,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(LI); AliasAnalysis::Location LoadLoc = AA->getLocation(LI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - + if (isLoad) { if (R == AliasAnalysis::NoAlias) { // If this is an over-aligned integer load (for example, @@ -423,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, MemLocOffset, LI, TD)) return MemDepResult::getClobber(Inst); - + continue; } - + // Must aliased loads are defs of each other. if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); @@ -441,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (R == AliasAnalysis::PartialAlias) return MemDepResult::getClobber(Inst); #endif - + // Random may-alias loads don't depend on each other without a // dependence. continue; @@ -458,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } - + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { // Atomic stores have complications involved. // FIXME: This is overly conservative. @@ -474,10 +476,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. AliasAnalysis::Location StoreLoc = AA->getLocation(SI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); - + if (R == AliasAnalysis::NoAlias) continue; if (R == AliasAnalysis::MustAlias) @@ -498,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); // Be conservative if the accessed pointer may alias the allocation. @@ -532,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(Inst); } } - + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) @@ -544,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, /// depends. MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { Instruction *ScanPos = QueryInst; - + // Check for a cached result MemDepResult &LocalCache = LocalDeps[QueryInst]; - + // If the cached entry is non-dirty, just return it. Note that this depends // on MemDepResult's default constructing to 'dirty'. if (!LocalCache.isDirty()) return LocalCache; - + // Otherwise, if we have a dirty entry, we know we can start the scan at that // instruction, which may save us some work. if (Instruction *Inst = LocalCache.getInst()) { ScanPos = Inst; - + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); } - + BasicBlock *QueryParent = QueryInst->getParent(); - + // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { // No dependence found. If this is the entry block of the function, it is @@ -591,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // Non-memory instruction. LocalCache = MemDepResult::getUnknown(); } - + // Remember the result! if (Instruction *I = LocalCache.getInst()) ReverseLocalDeps[I].insert(QueryInst); - + return LocalCache; } @@ -636,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { /// the uncached case, this starts out as the set of predecessors we care /// about. SmallVector<BasicBlock*, 32> DirtyBlocks; - + if (!Cache.empty()) { // Okay, we have a cache entry. If we know it is not dirty, just return it // with no computation. @@ -644,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { ++NumCacheNonLocal; return Cache; } - + // If we already have a partially computed set of results, scan them to // determine what is dirty, seeding our initial DirtyBlocks worklist. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E; ++I) if (I->getResult().isDirty()) DirtyBlocks.push_back(I->getBB()); - + // Sort the cache so that we can do fast binary search lookups below. std::sort(Cache.begin(), Cache.end()); - + ++NumCacheDirtyNonLocal; //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " // << Cache.size() << " cached: " << *QueryInst; @@ -665,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DirtyBlocks.push_back(*PI); ++NumUncacheNonLocal; } - + // isReadonlyCall - If this is a read-only call, we can be more aggressive. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); SmallPtrSet<BasicBlock*, 64> Visited; - + unsigned NumSortedEntries = Cache.size(); DEBUG(AssertSorted(Cache)); - + // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { BasicBlock *DirtyBB = DirtyBlocks.back(); DirtyBlocks.pop_back(); - + // Already processed this block? if (!Visited.insert(DirtyBB)) continue; - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. DEBUG(AssertSorted(Cache, NumSortedEntries)); - NonLocalDepInfo::iterator Entry = + NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, NonLocalDepEntry(DirtyBB)); if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; - if (Entry != Cache.begin()+NumSortedEntries && + if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block // is done. if (!Entry->getResult().isDirty()) continue; - + // Otherwise, remember this slot so we can update the value. ExistingResult = &*Entry; } - + // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); @@ -715,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { QueryCS.getInstruction()); } } - + // Find out if this block has a local dependency for QueryInst. MemDepResult Dep; - + if (ScanPos != DirtyBB->begin()) { Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { @@ -728,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { } else { Dep = MemDepResult::getNonFuncLocal(); } - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the association! if (!Dep.isNonLocal()) { @@ -744,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { if (Instruction *Inst = Dep.getInst()) ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); } else { - + // If the block *is* completely transparent to the load, we need to check // the predecessors of this block. Add them to our worklist. for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) DirtyBlocks.push_back(*PI); } } - + return Cache; } @@ -769,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - + PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD); - + // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI @@ -794,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis:: GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = @@ -802,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, NonLocalDepEntry(BB)); if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; - + // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. if (ExistingResult && !ExistingResult->getResult().isDirty()) { ++NumCacheNonLocalPtr; return ExistingResult->getResult(); - } - + } + // Otherwise, we have to scan for the value. If we have a dirty cache // entry, start scanning from its position, otherwise we scan from the end // of the block. @@ -823,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; ScanPos = ExistingResult->getResult().getInst(); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } - + // Scan the block for the dependency. MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache->push_back(NonLocalDepEntry(BB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! if (!Dep.isDef() && !Dep.isClobber()) return Dep; - + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently // update MemDep when we remove instructions. Instruction *Inst = Dep.getInst(); @@ -859,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain /// number of elements in the array that are already properly ordered. This is /// optimized for the case when only a few entries are added. -static void +static void SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { switch (Cache.size() - NumSortedEntries) { @@ -911,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVectorImpl<NonLocalDepResult> &Result, DenseMap<BasicBlock*, Value*> &Visited, bool SkipFirstBlock) { - + // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -925,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. - std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = + std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); NonLocalPointerInfo *CacheInfo = &Pair.first->second; @@ -987,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB()); if (VI == Visited.end() || VI->second == Pointer.getAddr()) continue; - + // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a // non-fully cached query, but there is little point in doing this. return true; } } - + Value *Addr = Pointer.getAddr(); for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { @@ -1005,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, ++NumCacheCompleteNonLocalPtr; return false; } - + // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, @@ -1014,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); else CacheInfo->Pair = BBSkipFirstBlockPair(); - + SmallVector<BasicBlock*, 32> Worklist; Worklist.push_back(StartBB); - + // PredList used inside loop. SmallVector<std::pair<BasicBlock*, PHITransAddr>, 16> PredList; @@ -1028,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); DEBUG(AssertSorted(*Cache)); - + while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); - + // Skip the first block if we have it. if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have @@ -1043,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DEBUG(AssertSorted(*Cache, NumSortedEntries)); MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, NumSortedEntries); - + // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); continue; } } - + // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. // If not, we just add the predecessors to the worklist and scan them with @@ -1067,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NewBlocks.push_back(*PI); continue; } - + // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. @@ -1082,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } - + // We do need to do phi translation, if we know ahead of time we can't phi // translate this value, don't even try. if (!Pointer.IsPotentiallyPHITranslatable()) goto PredTranslationFailure; - + // We may have added values to the cache list before this PHI translation. // If so, we haven't done anything to ensure that the cache remains sorted. // Sort it now (if needed) so that recursive invocations of @@ -1110,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); - + // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur // with PHI translation when a critical edge exists and the PHI node in @@ -1127,14 +1129,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // the analysis and can ignore it. if (InsertRes.first->second == PredPtrVal) continue; - + // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. - for (unsigned i = 0; i < PredList.size(); i++) + for (unsigned i = 0, n = PredList.size(); i < n; ++i) Visited.erase(PredList[i].first); goto PredTranslationFailure; @@ -1143,10 +1145,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Actually process results here; this need to be a separate loop to avoid // calling getNonLocalPointerDepFromBB for blocks we don't want to return - // any results for. (getNonLocalPointerDepFromBB will modify our + // any results for. (getNonLocalPointerDepFromBB will modify our // datastructures in ways the code after the PredTranslationFailure label // doesn't expect.) - for (unsigned i = 0; i < PredList.size(); i++) { + for (unsigned i = 0, n = PredList.size(); i < n; ++i) { BasicBlock *Pred = PredList[i].first; PHITransAddr &PredPointer = PredList[i].second; Value *PredPtrVal = PredPointer.getAddr(); @@ -1186,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, continue; } } - + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); - + // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all @@ -1204,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // The following code is "failure"; we can't produce a sane translation // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - + if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); } - + // Since we failed phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set". Clear out the indicator for this. CacheInfo->Pair = BBSkipFirstBlockPair(); - + // If *nothing* works, mark the pointer as unknown. // // If this is the magic first block, return this as a clobber of the whole @@ -1225,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // we have to bail out. if (SkipFirstBlock) return true; - + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); if (I->getBB() != BB) continue; - + assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); I->setResult(MemDepResult::getUnknown()); @@ -1250,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, /// CachedNonLocalPointerInfo, remove it. void MemoryDependenceAnalysis:: RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { - CachedNonLocalPointerInfo::iterator It = + CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); if (It == NonLocalPointerDeps.end()) return; - + // Remove all of the entries in the BB->val map. This involves removing // instructions from the reverse map. NonLocalDepInfo &PInfo = It->second.NonLocalDeps; - + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); if (Target == 0) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); } - + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). NonLocalPointerDeps.erase(It); } @@ -1321,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Remove this local dependency info. LocalDeps.erase(LocalDepEntry); } - + // If we have any cached pointer dependencies on this instruction, remove // them. If the instruction has non-pointer type, then it can't be a pointer // base. - + // Remove it from both the load info and the store info. The instruction // can't be in either of these maps if it is non-pointer. if (RemInst->getType()->isPointerTy()) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); } - + // Loop over all of the things that depend on the instruction we're removing. - // + // SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd; // If we find RemInst as a clobber or Def in any of the maps for other values, @@ -1346,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { MemDepResult NewDirtyVal; if (!RemInst->isTerminator()) NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); - + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second; // RemInst can't be the terminator if it has local stuff depending on it. assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) && "Nothing can locally depend on a terminator"); - + for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(), E = ReverseDeps.end(); I != E; ++I) { Instruction *InstDependingOnRemInst = *I; assert(InstDependingOnRemInst != RemInst && "Already removed our local dep info"); - + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; - + // Make sure to remember that new things depend on NewDepInst. assert(NewDirtyVal.getInst() && "There is no way something else can have " "a local dep on this if it is a terminator!"); - ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst)); } - + ReverseLocalDeps.erase(ReverseDepIt); // Add new reverse deps after scanning the set, to avoid invalidating the @@ -1379,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); if (ReverseDepIt != ReverseNonLocalDeps.end()) { SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second; for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); - + PerInstNLInfo &INLD = NonLocalDeps[*I]; // The information is now dirty! INLD.second = true; - - for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), DE = INLD.first.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NextI = NewDirtyVal.getInst()) ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); } @@ -1412,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a // value in the NonLocalPointerDeps info. ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = @@ -1420,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second; SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd; - + for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { ValueIsLoadPair P = *I; assert(P.getPointer() != RemInst && "Already removed NonLocalPointerDeps info for RemInst"); - + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; - + // The cache is not valid for any specific block anymore. NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); - + // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); } - + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its // subsequent value may invalidate the sortedness. std::sort(NLPDI.begin(), NLPDI.end()); } - + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); - + while (!ReversePtrDepsToAdd.empty()) { ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] .insert(ReversePtrDepsToAdd.back().second); ReversePtrDepsToAdd.pop_back(); } } - - + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); AA->deleteValue(RemInst); DEBUG(verifyRemoved(RemInst)); @@ -1472,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { assert(I->second.getInst() != D && "Inst occurs in data structures"); } - + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), E = NonLocalPointerDeps.end(); I != E; ++I) { assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); @@ -1481,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { II != E; ++II) assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); } - + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), E = NonLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1490,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = INLD.first.end(); II != EE; ++II) assert(II->getResult().getInst() != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), E = ReverseLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1498,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), E = ReverseNonLocalDeps.end(); I != E; ++I) { @@ -1507,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseNonLocalPtrDepTy::const_iterator I = ReverseNonLocalPtrDeps.begin(), E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in rev NLPD map"); - + for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(), E = I->second.end(); II != E; ++II) assert(*II != ValueIsLoadPair(D, false) && *II != ValueIsLoadPair(D, true) && "Inst occurs in ReverseNonLocalPtrDeps map"); } - + } diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index 745d8c60bb..48d7d05d78 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -84,7 +84,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - arrayMap[0][F->begin()][0] = i++; + arrayMap[(BasicBlock*)0][F->begin()][0] = i++; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); @@ -125,7 +125,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { << currentPath->getCount() << "\n"); // setup the entry edge (normally path profiling doesn't care about this) if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) - edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] + edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]] += currentPath->getCount(); for( ProfilePathEdgeIterator nextEdge = pev->begin(), diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 2daa7d4f6b..9626a48b9d 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -249,7 +249,7 @@ const BasicBlock *ProfileInfoT<Function,BasicBlock>:: succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); if (Succ == End) { - P[0] = BB; + P[(const BasicBlock*)0] = BB; if (Mode & GetPathToExit) { hasFoundPath = true; BB = 0; @@ -752,10 +752,10 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { Succ != End; ++Succ) { Path P; GetPath(*Succ, 0, P, GetPathToExit); - if (Dest && Dest != P[0]) { + if (Dest && Dest != P[(const BasicBlock*)0]) { AllEdgesHaveSameReturn = false; } - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; } if (AllEdgesHaveSameReturn) { if(EstimateMissingEdges(BB)) { @@ -927,7 +927,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) { Path P; const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; if (!Dest) continue; if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 07d83296bc..6ea915fdb0 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4230,6 +4230,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { return Max ? Max : SE->getCouldNotCompute(); } +bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, + ScalarEvolution *SE) const { + if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) + return true; + + if (!ExitNotTaken.ExitingBlock) + return false; + + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExactNotTaken != SE->getCouldNotCompute() + && SE->hasOperand(ENT->ExactNotTaken, S)) { + return true; + } + } + return false; +} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -6940,6 +6959,17 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + + for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + BackedgeTakenCounts.erase(I++); + } + else + ++I; + } } typedef DenseMap<const Loop *, std::string> VerifyMap; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 72421a00c7..976cd87321 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -263,8 +263,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { case Instruction::PtrToInt: // A ptrtoint cast is free so long as the result is large enough to store // the pointer, and a legal integer type. - if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && - OpTy->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && + Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) return TCC_Free; // Otherwise it's not a no-op. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 45b75df508..45dcc5e37e 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -953,6 +953,8 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { // Check for pointer simplifications. if (V->getType()->isPointerTy()) { + if (isKnownNonNull(V)) + return true; if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) if (isGEPKnownNonNull(GEP, TD, Depth)) return true; diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 942346b44e..9dafe2a036 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -292,7 +292,7 @@ void BitstreamCursor::ReadAbbrevRecord() { Abbv->Add(BitCodeAbbrevOp(0)); continue; } - + Abbv->Add(BitCodeAbbrevOp(E, Data)); } else Abbv->Add(BitCodeAbbrevOp(E)); diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 4f19dd00e6..8bac6da892 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -60,7 +60,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { I != E; ++I) EnumerateValue(I->getAliasee()); - // Insert constants and metadata that are named at module level into the slot + // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); EnumerateNamedMetadata(M); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index d1ea027338..76ebe9aca9 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,6 +1,7 @@ # `Support' and `TableGen' libraries are added on the top-level CMakeLists.txt add_subdirectory(IR) +add_subdirectory(IRReader) add_subdirectory(CodeGen) add_subdirectory(Bitcode) add_subdirectory(Transforms) diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 58fe2ed9d3..8d15c069c6 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp + ErlangGCPrinter.cpp OcamlGCPrinter.cpp Win64Exception.cpp ) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 4ded2818ed..57e0acda89 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -112,6 +112,17 @@ DIE::~DIE() { delete Children[i]; } +/// Climb up the parent chain to get the compile unit DIE this DIE belongs to. +DIE *DIE::getCompileUnit() const{ + DIE *p = getParent(); + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit) + return p; + p = p->getParent(); + } + llvm_unreachable("We should not have orphaned DIEs."); +} + #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; @@ -133,7 +144,7 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) { O << "Size: " << Size << "\n"; } - const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData(); + const SmallVectorImpl<DIEAbbrevData> &Data = Abbrev.getData(); IndentCount += 2; for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -313,7 +324,7 @@ void DIEEntry::print(raw_ostream &O) { /// unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { if (!Size) { - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); } @@ -332,7 +343,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; } - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 35d7959ac1..18b6966e18 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -66,7 +66,7 @@ namespace llvm { /// Data - Raw data bytes for abbreviation. /// - SmallVector<DIEAbbrevData, 8> Data; + SmallVector<DIEAbbrevData, 12> Data; public: DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} @@ -75,7 +75,7 @@ namespace llvm { uint16_t getTag() const { return Tag; } unsigned getNumber() const { return Number; } uint16_t getChildrenFlag() const { return ChildrenFlag; } - const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; } + const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; } void setTag(uint16_t T) { Tag = T; } void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } @@ -133,7 +133,7 @@ namespace llvm { /// Attribute values. /// - SmallVector<DIEValue*, 32> Values; + SmallVector<DIEValue*, 12> Values; // Private data for print() mutable unsigned IndentCount; @@ -150,8 +150,11 @@ namespace llvm { unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector<DIE *> &getChildren() const { return Children; } - const SmallVector<DIEValue*, 32> &getValues() const { return Values; } + const SmallVectorImpl<DIEValue*> &getValues() const { return Values; } DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile unit DIE this DIE belongs + /// to. + DIE *getCompileUnit() const; void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } @@ -232,9 +235,10 @@ namespace llvm { /// static unsigned BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { - if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; - if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; - if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; } else { if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 01f15e52e8..1c743c2414 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -35,7 +35,7 @@ using namespace llvm; CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0) { + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 77bf6a9e50..2b180c6cc3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -87,6 +87,9 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap<DIE *, const MDNode *> ContainingTypeMap; + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; + /// getLowerBoundDefault - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t getDefaultLowerBound() const; @@ -103,6 +106,7 @@ public: unsigned getUniqueID() const { return UniqueID; } unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } const StringMap<DIE*> &getGlobalNames() const { return GlobalNames; } const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; } @@ -120,6 +124,7 @@ public: return AccelTypes; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 30bfa78989..585a92a92f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -352,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); + if (AbsSPDIE) { + bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); + // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of + // DW_FORM_ref4. SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + AbsSPDIE); SPCU->addDie(SPDie); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); @@ -716,13 +721,6 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { if (!FirstCU) FirstCU = NewCU; - if (useSplitDwarf()) { - // This should be a unique identifier when we want to build .dwp files. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); - // Now construct the skeleton CU associated. - constructSkeletonCU(N); - } - InfoHolder.addUnit(NewCU); CUMap.insert(std::make_pair(N, NewCU)); @@ -789,6 +787,14 @@ void DwarfDebug::beginModule() { DIArray RetainedTypes = CUNode.getRetainedTypes(); for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + // This should be a unique identifier when we want to build .dwp files. + CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + // Now construct the skeleton CU associated. + constructSkeletonCU(CUNode); + } } // Tell MMI that we have debug info. @@ -1666,8 +1672,8 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Start the size with the size of abbreviation code. Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); - const SmallVector<DIEValue*, 32> &Values = Die->getValues(); - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); // Size the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -1692,15 +1698,19 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Compute the size and offset of all the DIEs. void DwarfUnits::computeSizeAndOffsets() { - for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), + // Offset from the beginning of debug info section. + unsigned AccuOffset = 0; + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { + (*I)->setDebugInfoOffset(AccuOffset); unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) - computeSizeAndOffset((*I)->getCUDie(), Offset); + unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + AccuOffset += EndOffset; } } @@ -1757,8 +1767,8 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { dwarf::TagString(Abbrev->getTag())); Asm->EmitULEB128(AbbrevNumber); - const SmallVector<DIEValue*, 32> &Values = Die->getValues(); - const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData(); + const SmallVectorImpl<DIEValue*> &Values = Die->getValues(); + const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev->getData(); // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { @@ -1774,6 +1784,13 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector<DIEAbbrev *> *Abbrevs) { DIEEntry *E = cast<DIEEntry>(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); + if (Form == dwarf::DW_FORM_ref_addr) { + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Origin->getOffset() returns the offset from start of the + // compile unit. + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Addr += Holder.getCUOffset(Origin->getCompileUnit()); + } Asm->EmitInt32(Addr); break; } @@ -1839,7 +1856,7 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, const MCSection *ASection, const MCSymbol *ASectionSym) { Asm->OutStreamer.SwitchSection(USection); - for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(), + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { CompileUnit *TheCU = *I; DIE *Die = TheCU->getCUDie(); @@ -1871,6 +1888,19 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } +/// For a given compile unit DIE, returns offset from beginning of debug info. +unsigned DwarfUnits::getCUOffset(DIE *Die) { + assert(Die->getTag() == dwarf::DW_TAG_compile_unit && + "Input DIE should be compile unit in getCUOffset."); + for (SmallVectorImpl<CompileUnit *>::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; + if (TheCU->getCUDie() == Die) + return TheCU->getDebugInfoOffset(); + } + llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); +} + // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2255,7 +2285,7 @@ void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; - for (SmallVector<DotDebugLocEntry, 4>::iterator + for (SmallVectorImpl<DotDebugLocEntry>::iterator I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I) { DotDebugLocEntry &Entry = *I; @@ -2269,7 +2299,7 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getDataLayout().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector<DotDebugLocEntry, 4>::iterator + for (SmallVectorImpl<DotDebugLocEntry>::iterator I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I, ++index) { DotDebugLocEntry &Entry = *I; @@ -2362,7 +2392,7 @@ void DwarfDebug::emitDebugRanges() { Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); - for (SmallVector<const MCSymbol *, 8>::iterator + for (SmallVectorImpl<const MCSymbol *>::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { if (*I) @@ -2420,13 +2450,13 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(), + for (SmallVectorImpl<const MDNode *>::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { const MDNode *Node = *I; DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node); - SmallVector<InlineInfoLabels, 4> &Labels = II->second; + SmallVectorImpl<InlineInfoLabels> &Labels = II->second; DISubprogram SP(Node); StringRef LName = SP.getLinkageName(); StringRef Name = SP.getName(); @@ -2445,7 +2475,7 @@ void DwarfDebug::emitDebugInlineInfo() { DwarfStrSectionSym); Asm->EmitULEB128(Labels.size(), "Inline count"); - for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(), + for (SmallVectorImpl<InlineInfoLabels>::iterator LI = Labels.begin(), LE = Labels.end(); LI != LE; ++LI) { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(LI->second->getOffset()); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 771bc362cb..81e345e628 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -274,6 +274,10 @@ public: /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } + + /// \brief for a given compile unit DIE, returns offset from beginning of + /// debug info. + unsigned getCUOffset(DIE *Die); }; /// \brief Collects and handles dwarf debug information. diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp new file mode 100644 index 0000000000..a8fb66dcf1 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -0,0 +1,120 @@ +//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the compiler plugin that is used in order to emit +// garbage collection information in a convenient layout for parsing and +// loading in the Erlang/OTP runtime. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGCPrinter : public GCMetadataPrinter { + public: + void beginAssembly(AsmPrinter &AP); + void finishAssembly(AsmPrinter &AP); + }; + +} + +static GCMetadataPrinterRegistry::Add<ErlangGCPrinter> +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGCPrinter() { } + +void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { } + +void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { + MCStreamer &OS = AP.OutStreamer; + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + + // Put this in a custom .note section. + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() + .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0, + SectionKind::getDataRel())); + + // For each function... + for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + GCFunctionInfo &MD = **FI; + + /** A compact GC layout. Emit this data structure: + * + * struct { + * int16_t PointCount; + * void *SafePointAddress[PointCount]; + * int16_t StackFrameSize; (in words) + * int16_t StackArity; + * int16_t LiveCount; + * int16_t LiveOffsets[LiveCount]; + * } __gcmap_<FUNCTIONNAME>; + **/ + + // Align to address width. + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + // Emit PointCount. + OS.AddComment("safe point count"); + AP.EmitInt16(MD.size()); + + // And each safe point... + for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; + ++PI) { + // Emit the address of the safe point. + OS.AddComment("safe point address"); + MCSymbol *Label = PI->Label; + AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/); + } + + // Stack information never change in safe points! Only print info from the + // first call-site. + GCFunctionInfo::iterator PI = MD.begin(); + + // Emit the stack frame size. + OS.AddComment("stack frame size (in words)"); + AP.EmitInt16(MD.getFrameSize() / IntPtrSize); + + // Emit stack arity, i.e. the number of stacked arguments. + unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ? + MD.getFunction().arg_size() - RegisteredArgs : 0; + OS.AddComment("stack arity"); + AP.EmitInt16(StackArity); + + // Emit the number of live roots in the function. + OS.AddComment("live root count"); + AP.EmitInt16(MD.live_size(PI)); + + // And for each live root... + for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + // Emit live root's offset within the stack frame. + OS.AddComment("stack index (offset / wordsize)"); + AP.EmitInt16(LI->StackOffset / IntPtrSize); + } + } +} diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index ddc7adab49..56aa3309d3 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -7,13 +7,13 @@ add_llvm_library(LLVMCodeGen CalcSpillWeights.cpp CallingConvLower.cpp CodeGen.cpp - CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DFAPacketizer.cpp DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp + ErlangGC.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a33b672044..35ec68d00c 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -22,7 +22,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCalculateSpillWeightsPass(Registry); - initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp deleted file mode 100644 index 24518443a7..0000000000 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ /dev/null @@ -1,423 +0,0 @@ -//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the pass that optimizes code placement and aligns loop -// headers to target-specific alignment boundaries. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "code-placement" -#include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -STATISTIC(NumLoopsAligned, "Number of loops aligned"); -STATISTIC(NumIntraElim, "Number of intra loop branches eliminated"); -STATISTIC(NumIntraMoved, "Number of intra loop branches moved"); - -namespace { - class CodePlacementOpt : public MachineFunctionPass { - const MachineLoopInfo *MLI; - const TargetInstrInfo *TII; - const TargetLowering *TLI; - - public: - static char ID; - CodePlacementOpt() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<MachineLoopInfo>(); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - bool HasFallthrough(MachineBasicBlock *MBB); - bool HasAnalyzableTerminator(MachineBasicBlock *MBB); - void Splice(MachineFunction &MF, - MachineFunction::iterator InsertPt, - MachineFunction::iterator Begin, - MachineFunction::iterator End); - bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, - MachineLoop *L); - bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, - MachineLoop *L); - bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L); - bool OptimizeIntraLoopEdges(MachineFunction &MF); - bool AlignLoops(MachineFunction &MF); - bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align); - }; - - char CodePlacementOpt::ID = 0; -} // end anonymous namespace - -char &llvm::CodePlacementOptID = CodePlacementOpt::ID; -INITIALIZE_PASS(CodePlacementOpt, "code-placement", - "Code Placement Optimizer", false, false) - -/// HasFallthrough - Test whether the given branch has a fallthrough, either as -/// a plain fallthrough or as a fallthrough case of a conditional branch. -/// -bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - return false; - // This conditional branch has no fallthrough. - if (FBB) - return false; - // An unconditional branch has no fallthrough. - if (Cond.empty() && TBB) - return false; - // It has a fallthrough. - return true; -} - -/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB. -/// This is called before major changes are begun to test whether it will be -/// possible to complete the changes. -/// -/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed -/// whenever possible. -/// -bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { - // Conservatively ignore EH landing pads. - if (MBB->isLandingPad()) return false; - - // Aggressively handle return blocks and similar constructs. - if (MBB->succ_empty()) return true; - - // Ask the target's AnalyzeBranch if it can handle this block. - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector<MachineOperand, 4> Cond; - // Make sure the terminator is understood. - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - return false; - // Ignore blocks which look like they might have EH-related control flow. - // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't - // recognize the possibility of a control transfer through an unwind. - // Such blocks contain EH_LABEL instructions, however they may be in the - // middle of the block. Instead of searching for them, just check to see - // if the CFG disagrees with AnalyzeBranch. - if (1u + !Cond.empty() != MBB->succ_size()) - return false; - // Make sure we have the option of reversing the condition. - if (!Cond.empty() && TII->ReverseBranchCondition(Cond)) - return false; - return true; -} - -/// Splice - Move the sequence of instructions [Begin,End) to just before -/// InsertPt. Update branch instructions as needed to account for broken -/// fallthrough edges and to take advantage of newly exposed fallthrough -/// opportunities. -/// -void CodePlacementOpt::Splice(MachineFunction &MF, - MachineFunction::iterator InsertPt, - MachineFunction::iterator Begin, - MachineFunction::iterator End) { - assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() && - "Splice can't change the entry block!"); - MachineFunction::iterator OldBeginPrior = prior(Begin); - MachineFunction::iterator OldEndPrior = prior(End); - - MF.splice(InsertPt, Begin, End); - - prior(Begin)->updateTerminator(); - OldBeginPrior->updateTerminator(); - OldEndPrior->updateTerminator(); -} - -/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump -/// to the loop top to the top of the loop so that they have a fall through. -/// This can introduce a branch on entry to the loop, but it can eliminate a -/// branch within the loop. See the @simple case in -/// test/CodeGen/X86/loop_blocks.ll for an example of this. -bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - MachineBasicBlock *TopMBB = L->getTopBlock(); - - bool BotHasFallthrough = HasFallthrough(L->getBottomBlock()); - - if (TopMBB == MF.begin() || - HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) { - new_top: - for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(), - PE = TopMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; - if (Pred == TopMBB) continue; - if (HasFallthrough(Pred)) continue; - if (!L->contains(Pred)) continue; - - // Verify that we can analyze all the loop entry edges before beginning - // any changes which will require us to be able to analyze them. - if (Pred == MF.begin()) - continue; - if (!HasAnalyzableTerminator(Pred)) - continue; - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred)))) - continue; - - // Move the block. - DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() - << " to top of loop.\n"); - Changed = true; - - // Move it and all the blocks that can reach it via fallthrough edges - // exclusively, to keep existing fallthrough edges intact. - MachineFunction::iterator Begin = Pred; - MachineFunction::iterator End = llvm::next(Begin); - while (Begin != MF.begin()) { - MachineFunction::iterator Prior = prior(Begin); - if (Prior == MF.begin()) - break; - // Stop when a non-fallthrough edge is found. - if (!HasFallthrough(Prior)) - break; - // Stop if a block which could fall-through out of the loop is found. - if (Prior->isSuccessor(End)) - break; - // If we've reached the top, stop scanning. - if (Prior == MachineFunction::iterator(TopMBB)) { - // We know top currently has a fall through (because we just checked - // it) which would be lost if we do the transformation, so it isn't - // worthwhile to do the transformation unless it would expose a new - // fallthrough edge. - if (!Prior->isSuccessor(End)) - goto next_pred; - // Otherwise we can stop scanning and proceed to move the blocks. - break; - } - // If we hit a switch or something complicated, don't move anything - // for this predecessor. - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior)))) - break; - // Ok, the block prior to Begin will be moved along with the rest. - // Extend the range to include it. - Begin = Prior; - ++NumIntraMoved; - } - - // Move the blocks. - Splice(MF, TopMBB, Begin, End); - - // Update TopMBB. - TopMBB = L->getTopBlock(); - - // We have a new loop top. Iterate on it. We shouldn't have to do this - // too many times if BranchFolding has done a reasonable job. - goto new_top; - next_pred:; - } - } - - // If the loop previously didn't exit with a fall-through and it now does, - // we eliminated a branch. - if (Changed && - !BotHasFallthrough && - HasFallthrough(L->getBottomBlock())) { - ++NumIntraElim; - } - - return Changed; -} - -/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the -/// portion of the loop contiguous with the header. This usually makes the loop -/// contiguous, provided that AnalyzeBranch can handle all the relevant -/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll -/// for an example of this. -bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - MachineBasicBlock *TopMBB = L->getTopBlock(); - MachineBasicBlock *BotMBB = L->getBottomBlock(); - - // Determine a position to move orphaned loop blocks to. If TopMBB is not - // entered via fallthrough and BotMBB is exited via fallthrough, prepend them - // to the top of the loop to avoid losing that fallthrough. Otherwise append - // them to the bottom, even if it previously had a fallthrough, on the theory - // that it's worth an extra branch to keep the loop contiguous. - MachineFunction::iterator InsertPt = - llvm::next(MachineFunction::iterator(BotMBB)); - bool InsertAtTop = false; - if (TopMBB != MF.begin() && - !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && - HasFallthrough(BotMBB)) { - InsertPt = TopMBB; - InsertAtTop = true; - } - - // Keep a record of which blocks are in the portion of the loop contiguous - // with the loop header. - SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks; - for (MachineFunction::iterator I = TopMBB, - E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I) - ContiguousBlocks.insert(I); - - // Find non-contigous blocks and fix them. - if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt))) - for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - MachineBasicBlock *BB = *BI; - - // Verify that we can analyze all the loop entry edges before beginning - // any changes which will require us to be able to analyze them. - if (!HasAnalyzableTerminator(BB)) - continue; - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB)))) - continue; - - // If the layout predecessor is part of the loop, this block will be - // processed along with it. This keeps them in their relative order. - if (BB != MF.begin() && - L->contains(prior(MachineFunction::iterator(BB)))) - continue; - - // Check to see if this block is already contiguous with the main - // portion of the loop. - if (!ContiguousBlocks.insert(BB)) - continue; - - // Move the block. - DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() - << " to be contiguous with loop.\n"); - Changed = true; - - // Process this block and all loop blocks contiguous with it, to keep - // them in their relative order. - MachineFunction::iterator Begin = BB; - MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB)); - for (; End != MF.end(); ++End) { - if (!L->contains(End)) break; - if (!HasAnalyzableTerminator(End)) break; - ContiguousBlocks.insert(End); - ++NumIntraMoved; - } - - // If we're inserting at the bottom of the loop, and the code we're - // moving originally had fall-through successors, bring the sucessors - // up with the loop blocks to preserve the fall-through edges. - if (!InsertAtTop) - for (; End != MF.end(); ++End) { - if (L->contains(End)) break; - if (!HasAnalyzableTerminator(End)) break; - if (!HasFallthrough(prior(End))) break; - } - - // Move the blocks. This may invalidate TopMBB and/or BotMBB, but - // we don't need them anymore at this point. - Splice(MF, InsertPt, Begin, End); - } - - return Changed; -} - -/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize -/// intra-loop branching and to form contiguous loops. -/// -/// This code takes the approach of making minor changes to the existing -/// layout to fix specific loop-oriented problems. Also, it depends on -/// AnalyzeBranch, which can't understand complex control instructions. -/// -bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - - // Do optimization for nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); - - // Do optimization for this loop. - Changed |= EliminateUnconditionalJumpsToTop(MF, L); - Changed |= MoveDiscontiguousLoopBlocks(MF, L); - - return Changed; -} - -/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize -/// intra-loop branching and to form contiguous loops. -/// -bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { - bool Changed = false; - - if (!TLI->shouldOptimizeCodePlacement()) - return Changed; - - // Do optimization for each loop in the function. - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); - I != E; ++I) - if (!(*I)->getParentLoop()) - Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); - - return Changed; -} - -/// AlignLoops - Align loop headers to target preferred alignments. -/// -bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { - const Function *F = MF.getFunction(); - if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) - return false; - - unsigned Align = TLI->getPrefLoopAlignment(); - if (!Align) - return false; // Don't care about loop alignment. - - bool Changed = false; - - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); - I != E; ++I) - Changed |= AlignLoop(MF, *I, Align); - - return Changed; -} - -/// AlignLoop - Align loop headers to target preferred alignments. -/// -bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, - unsigned Align) { - bool Changed = false; - - // Do alignment for nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= AlignLoop(MF, *I, Align); - - L->getTopBlock()->setAlignment(Align); - Changed = true; - ++NumLoopsAligned; - - return Changed; -} - -bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { - MLI = &getAnalysis<MachineLoopInfo>(); - if (MLI->empty()) - return false; // No loops. - - TLI = MF.getTarget().getTargetLowering(); - TII = MF.getTarget().getInstrInfo(); - - bool Changed = OptimizeIntraLoopEdges(MF); - - Changed |= AlignLoops(MF); - - return Changed; -} diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp new file mode 100644 index 0000000000..8a1e2d9c99 --- /dev/null +++ b/lib/CodeGen/ErlangGC.cpp @@ -0,0 +1,81 @@ +//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Erlang/OTP runtime-compatible garbage collector +// (e.g. defines safe points, root initialization etc.) +// +// The frametable emitter is in ErlangGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGC : public GCStrategy { + MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const; + public: + ErlangGC(); + bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF); + }; + +} + +static GCRegistry::Add<ErlangGC> +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGC() { } + +ErlangGC::ErlangGC() { + InitRoots = false; + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; + CustomRoots = false; + CustomSafePoints = true; +} + +MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; + ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + MI != ME; ++MI) + + if (MI->getDesc().isCall()) { + + // Do not treat tail call sites as safe points. + if (MI->getDesc().isTerminator()) + continue; + + /* Code copied from VisitCallPoint(...) */ + MachineBasicBlock::iterator RAI = MI; ++RAI; + MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc()); + FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc()); + } + + return false; +} diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 3b28e6afb6..7793e96c35 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx) { + SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 3b09c6b779..cd948e24a6 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1061,7 +1061,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } // Align this block if the layout predecessor's edge into this block is - // cold relative to the block. When this is true, othe predecessors make up + // cold relative to the block. When this is true, other predecessors make up // all of the hot entries into the block and thus alignment is likely to be // important. BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 5e04f2d8a3..04321f3292 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -574,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { return BV; } +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index a777f52cb2..1af00e84a6 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); VRegInfo[Reg].first = RC; } diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 103b058c13..c872355e37 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -2182,7 +2182,7 @@ public: /// Callback to select the highest priority node from the ready Q. virtual SUnit *pickNode(bool &IsTopNode) { if (ReadyQ.empty()) return NULL; - pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); IsTopNode = false; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 6e1cad3252..1af65c88ab 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -39,12 +39,9 @@ static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); static cl::opt<bool> DisableBlockPlacement("disable-block-placement", - cl::Hidden, cl::desc("Disable the probability-driven block placement, and " - "re-enable the old code placement pass")); + cl::Hidden, cl::desc("Disable probability-driven block placement")); static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", cl::Hidden, cl::desc("Collect probability-driven block placement stats")); -static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden, - cl::desc("Disable code placement")); static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, @@ -149,10 +146,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { return applyDisable(TargetID, DisableEarlyTailDup); if (StandardID == &MachineBlockPlacementID) - return applyDisable(TargetID, DisableCodePlace); - - if (StandardID == &CodePlacementOptID) - return applyDisable(TargetID, DisableCodePlace); + return applyDisable(TargetID, DisableBlockPlacement); if (StandardID == &StackSlotColoringID) return applyDisable(TargetID, DisableSSC); @@ -742,16 +736,7 @@ bool TargetPassConfig::addGCPasses() { /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - AnalysisID PassID = 0; - if (!DisableBlockPlacement) { - // MachineBlockPlacement is a new pass which subsumes the functionality of - // CodPlacementOpt. The old code placement pass can be restored by - // disabling block placement, but eventually it will be removed. - PassID = addPass(&MachineBlockPlacementID); - } else { - PassID = addPass(&CodePlacementOptID); - } - if (PassID) { + if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) addPass(&MachineBlockPlacementStatsID); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 32830f57bc..5a168dd244 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -55,7 +55,6 @@ INITIALIZE_PASS_END(PEI, "prologepilog", "Prologue/Epilogue Insertion & Frame Finalization", false, false) -STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); @@ -101,7 +100,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TFI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); @@ -548,9 +547,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector<int, 2>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } // FIXME: Once this is working, then enable flag will change to a target @@ -593,7 +594,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; - if (RS && (int)i == RS->getScavengingFrameIndex()) + if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; @@ -615,7 +616,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; - if (RS && (int)i == RS->getScavengingFrameIndex()) + if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; @@ -631,9 +632,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // stack pointer. if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || !RegInfo->useFPForScavengingIndex(Fn))) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector<int, 2>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } if (!TFI.targetHandlesStackFrameRounding()) { @@ -816,14 +819,20 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); - unsigned VirtReg = 0; - unsigned ScratchReg = 0; int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; + MachineBasicBlock::iterator J = llvm::next(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); @@ -833,29 +842,37 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - ++NumVirtualFrameRegs; - - // Have we already allocated a scratch register for this virtual? - if (Reg != VirtReg) { - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // Scavenge a new scratch register - VirtReg = Reg; - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - ScratchReg = RS->scavengeRegister(RC, I, SPAdj); - ++NumScavengedRegs; - } + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); - MI->getOperand(i).setReg(ScratchReg); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setUsed(ScratchReg); } } - RS->forward(I); - ++I; + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != llvm::prior(J)) { + BB->splice(J, BB, I++); + RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I)); + } else + ++I; } } } diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 6da901f81d..55a66ba548 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -45,9 +45,11 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const { } void RegScavenger::initRegState() { - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; + for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + I->Reg = 0; + I->Restore = NULL; + } // All registers started out unused. RegsAvailable.set(); @@ -121,10 +123,13 @@ void RegScavenger::forward() { MachineInstr *MI = MBBI; - if (MI == ScavengeRestore) { - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; + for (SmallVector<ScavengedInfo, 2>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + if (I->Restore != MI) + continue; + + I->Reg = 0; + I->Restore = NULL; } if (MI->isDebugValue()) @@ -145,7 +150,7 @@ void RegScavenger::forward() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || isReserved(Reg)) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { @@ -170,7 +175,7 @@ void RegScavenger::forward() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || isReserved(Reg)) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -360,37 +365,47 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } - assert(ScavengedReg == 0 && - "Scavenger slot is live, unable to scavenge another register!"); + // Find an available scavenging slot. + unsigned SI; + for (SI = 0; SI < Scavenged.size(); ++SI) + if (Scavenged[SI].Reg == 0) + break; + + if (SI == Scavenged.size()) { + // We need to scavenge a register but have no spill slot, the target + // must know how to do it (if not, we'll assert below). + Scavenged.push_back(ScavengedInfo()); + } // Avoid infinite regress - ScavengedReg = SReg; + Scavenged[SI].Reg = SReg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { // Spill the scavenged register before I. - assert(ScavengingFrameIndex >= 0 && + assert(Scavenged[SI].FrameIndex >= 0 && "Cannot scavenge register without an emergency spill slot!"); - TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); + TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, + RC, TRI); MachineBasicBlock::iterator II = prior(I); unsigned FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, + RC, TRI); II = prior(UseMI); FIOperandNum = getFrameIndexOperandNum(II); TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } - ScavengeRestore = prior(UseMI); + Scavenged[SI].Restore = prior(UseMI); // Doing this here leads to infinite regress. - // ScavengedReg = SReg; - ScavengedRC = RC; + // Scavenged[SI].Reg = SReg; DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 716fb93b29..ff98a04c5b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4496,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + if (!VT.isVector() && (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, DAG.getSetCC(N->getDebugLoc(), TLI.getSetCCResultType(VT), @@ -5835,14 +5835,25 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // No FP constant should be created after legalization as Instruction + // Selection pass has hard time in dealing with FP constant. + // + // We don't need test this condition for transformation like following, as + // the DAG being transformed implies it is legal to take FP constant as + // operand. + // + // (fadd (fmul c, x), x) -> (fmul c+1, x) + // + bool AllowNewFpConst = (Level < AfterLegalizeDAG); + // If allow, fold (fadd (fneg x), x) -> 0.0 - if (DAG.getTarget().Options.UnsafeFPMath && + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { return DAG.getConstantFP(0.0, VT); } // If allow, fold (fadd x, (fneg x)) -> 0.0 - if (DAG.getTarget().Options.UnsafeFPMath && + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { return DAG.getConstantFP(0.0, VT); } @@ -5944,7 +5955,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N0.getOpcode() == ISD::FADD) { + if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul 3.0, x) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && @@ -5954,7 +5965,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N1.getOpcode() == ISD::FADD) { + if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul 3.0, x) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && @@ -5965,7 +5976,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) - if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + if (AllowNewFpConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { @@ -6811,9 +6823,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { MVT::Other, Chain, Tmp, N2); } - // visitXOR has changed XOR's operands. - Op0 = TheXor->getOperand(0); - Op1 = TheXor->getOperand(1); + // visitXOR has changed XOR's operands or replaced the XOR completely, + // bail out. + return SDValue(N, 0); } } @@ -7699,16 +7711,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } -/// Returns the base pointer and an integer offset from that object. -static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { - if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { - int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); - SDValue Base = Ptr->getOperand(0); - return std::make_pair(Base, Offset); +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; } - return std::make_pair(Ptr, 0); -} + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr) { + bool IsIndexSignExt = false; + + // Just Base or possibly anything else. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Base + offset. + if (isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, + IsIndexSignExt); + } + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa<ConstantSDNode>(Offset)) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + } +}; /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. @@ -7755,16 +7833,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; - // This holds the base pointer and the offset in bytes from the base pointer. - std::pair<SDValue, int64_t> BasePtr = - GetPointerBaseAndOffset(St->getBasePtr()); + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. - if (!BasePtr.first.getNode()) + if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.first.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.getOpcode() == ISD::UNDEF) return false; // Save the LoadSDNodes that we find in the chain. @@ -7786,11 +7864,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - std::pair<SDValue, int64_t> Ptr = - GetPointerBaseAndOffset(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); // Check that the base pointer is the same as the original one. - if (Ptr.first.getNode() != BasePtr.first.getNode()) + if (!Ptr.equalBaseIndex(BasePtr)) break; // Check that the alignment is the same. @@ -7816,7 +7893,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // We found a potential memory operand to merge. - StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); + StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); // Find the next memory operand in the chain. If the next operand in the // chain is a store then move up and continue the scan with the next @@ -8013,7 +8090,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. - SDValue LdBasePtr; + BaseIndexOffset LdBasePtr; for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); @@ -8039,21 +8116,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - std::pair<SDValue, int64_t> LdPtr = - GetPointerBaseAndOffset(Ld->getBasePtr()); - + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); // If this is not the first ptr that we check. - if (LdBasePtr.getNode()) { + if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. - if (LdPtr.first != LdBasePtr) + if (!LdPtr.equalBaseIndex(LdBasePtr)) break; } else { // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr.first; + LdBasePtr = LdPtr; } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); } if (LoadNodes.size() < 2) @@ -8978,12 +9053,32 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + + // Skip bitcasting + if (V->getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + DebugLoc dl = N->getDebugLoc(); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); EVT SmallVT = V->getOperand(1).getValueType(); - if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); // Only handle cases where both indexes are constants with the same type. @@ -8996,30 +9091,18 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Combine: // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) // Into: - // indices are equal => V1 + // indices are equal or bit offsets are equal => V1 // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); + if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getNode(ISD::BITCAST, dl, + N->getOperand(0).getValueType(), + V->getOperand(0)), N->getOperand(1)); } } - if (V->getOpcode() == ISD::CONCAT_VECTORS) { - // Combine: - // (extract_subvec (concat V1, V2, ...), i) - // Into: - // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. - if (V->getOperand(0).getValueType() != NVT) - return SDValue(); - unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); - unsigned NumElems = NVT.getVectorNumElements(); - assert((Idx % NumElems) == 0 && - "IDX in concat is not a multiple of the result vector length."); - return V->getOperand(Idx / NumElems); - } - return SDValue(); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 10e2dc6149..9ac738e507 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1183,6 +1183,8 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); + assert (MaterialReg != 0 && "Unable to materialize imm."); + if (MaterialReg == 0) return 0; } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index e26d1656e8..b6436bf427 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { SDValue &OpEntry = PromotedIntegers[Op]; assert(OpEntry.getNode() == 0 && "Node is already promoted!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { SDValue &OpEntry = SoftenedFloats[Op]; assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = ScalarizedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, @@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, @@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, @@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already split"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = WidenedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node already widened!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } @@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); - for (unsigned i = 0, e = Results.size(); i != e; ++i) + for (unsigned i = 0, e = Results.size(); i != e; ++i) { ReplaceValueWith(SDValue(N, i), Results[i]); + // Propagate node ordering + DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); + } return true; } diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index d2269f8acc..7e7b8974be 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -33,8 +33,10 @@ class SDNodeOrdering { public: SDNodeOrdering() {} - void add(const SDNode *Node, unsigned O) { - OrderMap[Node] = O; + void add(const SDNode *Node, unsigned NewOrder) { + unsigned &OldOrder = OrderMap[Node]; + if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) + OldOrder = NewOrder; } void remove(const SDNode *Node) { DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 35707e86ce..64244313a3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1917,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, } case ISD::LOAD: { LoadSDNode *LD = cast<LoadSDNode>(Op); - if (ISD::isZEXTLoad(Op.getNode())) { + // If this is a ZEXTLoad and we are looking at the loaded value. + if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); @@ -2287,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ break; } - // Handle LOADX separately here. EXTLOAD case will fallthrough. - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { - unsigned ExtType = LD->getExtensionType(); - switch (ExtType) { - default: break; - case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp+1; - case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp; + // If we are looking at the loaded value of the SDNode. + if (Op.getResNo() == 0) { + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp; + } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 33d100eb3a..ce40cd6a0c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4914,7 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isOperationLegalOrCustom(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){ setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 39a1f8a3d0..eeea9e4cfc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -362,6 +362,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TargetSubtargetInfo &ST = const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); ST.resetSubtargetFeatures(MF); + TM.resetTargetOptions(MF); DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -784,8 +785,12 @@ void SelectionDAGISel::DoInstructionSelection() { if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) continue; // Replace node. - if (ResNode) + if (ResNode) { + // Propagate ordering + CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); + ReplaceUses(Node, ResNode); + } // If after the replacement this node is not used any more, // remove this dead node. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 320128a999..c5bbba3ffc 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -29,6 +29,7 @@ #define DEBUG_TYPE "spillplacement" #include "SpillPlacement.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index ec44b8cb59..a789a2596d 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -67,14 +67,14 @@ DisableColoring("no-stack-coloring", /// code. If this flag is enabled, we try to save the user. static cl::opt<bool> ProtectFromEscapedAllocas("protect-from-escaped-allocas", - cl::init(false), cl::Hidden, - cl::desc("Do not optimize lifetime zones that are broken")); + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that " + "are broken")); STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); -STATISTIC(EscapedAllocas, - "Number of allocas that escaped the lifetime region"); +STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); //===----------------------------------------------------------------------===// // StackColoring Pass @@ -577,7 +577,7 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { SlotIndex Index = Indexes->getInstructionIndex(I); LiveInterval *Interval = Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && - "Found instruction usage outside of live range."); + "Found instruction usage outside of live range."); } #endif @@ -741,9 +741,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { std::stable_sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); - bool Chanded = true; - while (Chanded) { - Chanded = false; + bool Changed = true; + while (Changed) { + Changed = false; for (unsigned I = 0; I < NumSlots; ++I) { if (SortedSlots[I] == -1) continue; @@ -760,7 +760,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Merge disjoint slots. if (!First->overlaps(*Second)) { - Chanded = true; + Changed = true; First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 3c346766ad..f42bdbd276 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -710,7 +710,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; - BenefitFromCodePlacementOpt = false; UseUnderscoreSetJmp = false; UseUnderscoreLongJmp = false; SelectIsExpensive = false; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 9a9ddc9b48..3bdca4c640 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -742,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".text$"; if (Kind.isBSS ()) return ".bss$"; - if (Kind.isThreadLocal()) - return ".tls$"; + if (Kind.isThreadLocal()) { + // 'LLVM' is just an arbitary string to ensure that the section name gets + // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. + return ".tls$LLVM"; + } if (Kind.isWriteable()) return ".data$"; return ".rdata$"; diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index f31f67d58c..783bfa1c1a 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency( report_fatal_error(ss.str()); } #endif - return DefMI->isTransient() ? 0 : 1; + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp index 2efbfd1f92..7dff9ff49a 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.cpp +++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp @@ -16,7 +16,7 @@ using namespace llvm; void DWARFDebugArangeSet::clear() { Offset = -1U; - std::memset(&Header, 0, sizeof(Header)); + std::memset(&HeaderData, 0, sizeof(Header)); ArangeDescriptors.clear(); } @@ -66,15 +66,15 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { // descriptor on the target system. This header is followed by a series // of tuples. Each tuple consists of an address and a length, each in // the size appropriate for an address on the target architecture. - Header.Length = data.getU32(offset_ptr); - Header.Version = data.getU16(offset_ptr); - Header.CuOffset = data.getU32(offset_ptr); - Header.AddrSize = data.getU8(offset_ptr); - Header.SegSize = data.getU8(offset_ptr); + HeaderData.Length = data.getU32(offset_ptr); + HeaderData.Version = data.getU16(offset_ptr); + HeaderData.CuOffset = data.getU32(offset_ptr); + HeaderData.AddrSize = data.getU8(offset_ptr); + HeaderData.SegSize = data.getU8(offset_ptr); // Perform basic validation of the header fields. - if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) || - (Header.AddrSize != 4 && Header.AddrSize != 8)) { + if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) || + (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) { clear(); return false; } @@ -84,7 +84,7 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { // size of an address). The header is padded, if necessary, to the // appropriate boundary. const uint32_t header_size = *offset_ptr - Offset; - const uint32_t tuple_size = Header.AddrSize * 2; + const uint32_t tuple_size = HeaderData.AddrSize * 2; uint32_t first_tuple_offset = 0; while (first_tuple_offset < header_size) first_tuple_offset += tuple_size; @@ -94,11 +94,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { Descriptor arangeDescriptor; assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length)); - assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize); + assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize); while (data.isValidOffset(*offset_ptr)) { - arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize); - arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize); + arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize); + arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize); // Each set of tuples is terminated by a 0 for the address and 0 // for the length. @@ -115,11 +115,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { void DWARFDebugArangeSet::dump(raw_ostream &OS) const { OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ", - Header.Length, Header.Version) + HeaderData.Length, HeaderData.Version) << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n", - Header.CuOffset, Header.AddrSize, Header.SegSize); + HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize); - const uint32_t hex_width = Header.AddrSize * 2; + const uint32_t hex_width = HeaderData.AddrSize * 2; for (DescriptorConstIter pos = ArangeDescriptors.begin(), end = ArangeDescriptors.end(); pos != end; ++pos) OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address) @@ -145,7 +145,7 @@ uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const { std::find_if(ArangeDescriptors.begin(), end, // Range DescriptorContainsAddress(address)); // Predicate if (pos != end) - return Header.CuOffset; + return HeaderData.CuOffset; return -1U; } diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h index 9a2a6d0f00..d76867615a 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.h +++ b/lib/DebugInfo/DWARFDebugArangeSet.h @@ -48,7 +48,7 @@ private: typedef DescriptorColl::const_iterator DescriptorConstIter; uint32_t Offset; - Header Header; + Header HeaderData; DescriptorColl ArangeDescriptors; public: @@ -58,11 +58,11 @@ public: bool extract(DataExtractor data, uint32_t *offset_ptr); void dump(raw_ostream &OS) const; - uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; } - uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; } + uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } + uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; } uint32_t findAddress(uint64_t address) const; uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); } - const struct Header &getHeader() const { return Header; } + const struct Header &getHeader() const { return HeaderData; } const Descriptor *getDescriptor(uint32_t i) const { if (i < ArangeDescriptors.size()) return &ArangeDescriptors[i]; diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index b077eb5e38..f79862d606 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -186,7 +186,7 @@ uint32_t DWARFDebugAranges::findAddress(uint64_t address) const { Range range(address); RangeCollIterator begin = Aranges.begin(); RangeCollIterator end = Aranges.end(); - RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan); + RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan); if (pos != end && pos->LoPC <= address && address < pos->HiPC()) { return pos->Offset; diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 3d59d251a0..906a3a3fda 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -535,6 +535,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { if (isa<UndefValue>(C)) { GenericValue Result; switch (C->getType()->getTypeID()) { + default: + break; case Type::IntegerTyID: case Type::X86_FP80TyID: case Type::FP128TyID: @@ -543,7 +545,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { // with the correct bit width. Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0); break; - default: + case Type::VectorTyID: + // if the whole vector is 'undef' just reserve memory for the value. + const VectorType* VTy = dyn_cast<VectorType>(C->getType()); + const Type *ElemTy = VTy->getElementType(); + unsigned int elemNum = VTy->getNumElements(); + Result.AggregateVal.resize(elemNum); + if (ElemTy->isIntegerTy()) + for (unsigned int i = 0; i < elemNum; ++i) + Result.AggregateVal[i].IntVal = + APInt(ElemTy->getPrimitiveSizeInBits(), 0); break; } return Result; @@ -825,6 +836,101 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { else llvm_unreachable("Unknown constant pointer type!"); break; + case Type::VectorTyID: { + unsigned elemNum; + Type* ElemTy; + const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C); + const ConstantVector *CV = dyn_cast<ConstantVector>(C); + const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(C); + + if (CDV) { + elemNum = CDV->getNumElements(); + ElemTy = CDV->getElementType(); + } else if (CV || CAZ) { + VectorType* VTy = dyn_cast<VectorType>(C->getType()); + elemNum = VTy->getNumElements(); + ElemTy = VTy->getElementType(); + } else { + llvm_unreachable("Unknown constant vector type!"); + } + + Result.AggregateVal.resize(elemNum); + // Check if vector holds floats. + if(ElemTy->isFloatTy()) { + if (CAZ) { + GenericValue floatZero; + floatZero.FloatVal = 0.f; + std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(), + floatZero); + break; + } + if(CV) { + for (unsigned i = 0; i < elemNum; ++i) + if (!isa<UndefValue>(CV->getOperand(i))) + Result.AggregateVal[i].FloatVal = cast<ConstantFP>( + CV->getOperand(i))->getValueAPF().convertToFloat(); + break; + } + if(CDV) + for (unsigned i = 0; i < elemNum; ++i) + Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i); + + break; + } + // Check if vector holds doubles. + if (ElemTy->isDoubleTy()) { + if (CAZ) { + GenericValue doubleZero; + doubleZero.DoubleVal = 0.0; + std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(), + doubleZero); + break; + } + if(CV) { + for (unsigned i = 0; i < elemNum; ++i) + if (!isa<UndefValue>(CV->getOperand(i))) + Result.AggregateVal[i].DoubleVal = cast<ConstantFP>( + CV->getOperand(i))->getValueAPF().convertToDouble(); + break; + } + if(CDV) + for (unsigned i = 0; i < elemNum; ++i) + Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i); + + break; + } + // Check if vector holds integers. + if (ElemTy->isIntegerTy()) { + if (CAZ) { + GenericValue intZero; + intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull); + std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(), + intZero); + break; + } + if(CV) { + for (unsigned i = 0; i < elemNum; ++i) + if (!isa<UndefValue>(CV->getOperand(i))) + Result.AggregateVal[i].IntVal = cast<ConstantInt>( + CV->getOperand(i))->getValue(); + else { + Result.AggregateVal[i].IntVal = + APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0); + } + break; + } + if(CDV) + for (unsigned i = 0; i < elemNum; ++i) + Result.AggregateVal[i].IntVal = APInt( + CDV->getElementType()->getPrimitiveSizeInBits(), + CDV->getElementAsInteger(i)); + + break; + } + llvm_unreachable("Unknown constant pointer type!"); + } + break; + default: SmallString<256> Msg; raw_svector_ostream OS(Msg); @@ -866,6 +972,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty); switch (Ty->getTypeID()) { + default: + dbgs() << "Cannot store value of type " << *Ty << "!\n"; + break; case Type::IntegerTyID: StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes); break; @@ -885,8 +994,19 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, *((PointerTy*)Ptr) = Val.PointerVal; break; - default: - dbgs() << "Cannot store value of type " << *Ty << "!\n"; + case Type::VectorTyID: + for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) { + if (cast<VectorType>(Ty)->getElementType()->isDoubleTy()) + *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal; + if (cast<VectorType>(Ty)->getElementType()->isFloatTy()) + *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal; + if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) { + unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8; + StoreIntToMemory(Val.AggregateVal[i].IntVal, + (uint8_t*)Ptr + numOfBytes*i, numOfBytes); + } + } + break; } if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian()) @@ -951,6 +1071,31 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, Result.IntVal = APInt(80, y); break; } + case Type::VectorTyID: { + const VectorType *VT = cast<VectorType>(Ty); + const Type *ElemT = VT->getElementType(); + const unsigned numElems = VT->getNumElements(); + if (ElemT->isFloatTy()) { + Result.AggregateVal.resize(numElems); + for (unsigned i = 0; i < numElems; ++i) + Result.AggregateVal[i].FloatVal = *((float*)Ptr+i); + } + if (ElemT->isDoubleTy()) { + Result.AggregateVal.resize(numElems); + for (unsigned i = 0; i < numElems; ++i) + Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i); + } + if (ElemT->isIntegerTy()) { + GenericValue intZero; + const unsigned elemBitWidth = cast<IntegerType>(ElemT)->getBitWidth(); + intZero.IntVal = APInt(elemBitWidth, 0); + Result.AggregateVal.resize(numElems, intZero); + for (unsigned i = 0; i < numElems; ++i) + LoadIntFromMemory(Result.AggregateVal[i].IntVal, + (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8); + } + break; + } default: SmallString<256> Msg; raw_svector_ostream OS(Msg); diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index ec4f7f6813..526c04e082 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -1187,6 +1187,39 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { ++VAList.UIntPairVal.second; } +void Interpreter::visitExtractElementInst(ExtractElementInst &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Dest; + + Type *Ty = I.getType(); + const unsigned indx = unsigned(Src2.IntVal.getZExtValue()); + + if(Src1.AggregateVal.size() > indx) { + switch (Ty->getTypeID()) { + default: + dbgs() << "Unhandled destination type for extractelement instruction: " + << *Ty << "\n"; + llvm_unreachable(0); + break; + case Type::IntegerTyID: + Dest.IntVal = Src1.AggregateVal[indx].IntVal; + break; + case Type::FloatTyID: + Dest.FloatVal = Src1.AggregateVal[indx].FloatVal; + break; + case Type::DoubleTyID: + Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal; + break; + } + } else { + dbgs() << "Invalid index in extractelement instruction\n"; + } + + SetValue(&I, Dest, SF); +} + GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, ExecutionContext &SF) { switch (CE->getOpcode()) { diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index e95db2fc4e..2952d7eabe 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -178,6 +178,7 @@ public: void visitAShr(BinaryOperator &I); void visitVAArgInst(VAArgInst &I); + void visitExtractElementInst(ExtractElementInst &I); void visitInstruction(Instruction &I) { errs() << I << "\n"; llvm_unreachable("Instruction not interpretable yet!"); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index ed2bf05e90..2d828914cd 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -649,6 +649,13 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, return addAttributes(C, Idx, AttributeSet::get(C, Idx, Attr)); } +AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Idx, + StringRef Kind) const { + llvm::AttrBuilder B; + B.addAttribute(Kind); + return addAttributes(C, Idx, AttributeSet::get(C, Idx, B)); +} + AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Idx, AttributeSet Attrs) const { if (!pImpl) return Attrs; diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 0c7effb5ca..1abb656435 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -47,6 +47,16 @@ bool Constant::isNegativeZeroValue() const { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) return CFP->isZero() && CFP->isNegative(); + // Equivalent for a vector of -0.0's. + if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) + if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue())) + if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative()) + return true; + + // We've already handled true FP case; any other FP vectors can't represent -0.0. + if (getType()->isFPOrFPVectorTy()) + return false; + // Otherwise, just use +0.0. return isNullValue(); } diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 5ee36abc6b..0d18bed001 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -71,6 +71,16 @@ static MDNode *getNonCompileUnitScope(MDNode *N) { return N; } +static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename, + StringRef Directory) { + assert(!Filename.empty() && "Unable to create file without name"); + Value *Pair[] = { + MDString::get(VMContext, Filename), + MDString::get(VMContext, Directory), + }; + return MDNode::get(VMContext, Pair); +} + /// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, @@ -93,13 +103,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit), - Constant::getNullValue(Type::getInt32Ty(VMContext)), + createFilePathPair(VMContext, Filename, Directory), ConstantInt::get(Type::getInt32Ty(VMContext), Lang), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), MDString::get(VMContext, Producer), - // isMain field can be removed when we remove the legacy debug info. - ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), MDString::get(VMContext, Flags), ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer), @@ -119,13 +125,9 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, /// createFile - Create a file descriptor to hold debugging information /// for a file. DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { - assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); - assert(!Filename.empty() && "Unable to create file without name"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_file_type), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - NULL // TheCU + createFilePathPair(VMContext, Filename, Directory) }; return DIFile(MDNode::get(VMContext, Elts)); } @@ -148,9 +150,9 @@ DIType DIBuilder::createNullPtrType(StringRef Name) { // ,size, alignment, offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), + NULL, // Filename NULL, //TheCU, MDString::get(VMContext, Name), - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -171,9 +173,9 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, // offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_base_type), + NULL, // File/directory name NULL, //TheCU, MDString::get(VMContext, Name), - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -190,9 +192,9 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), + NULL, // Filename NULL, //TheCU, MDString::get(VMContext, StringRef()), // Empty name. - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -210,9 +212,9 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), + NULL, // Filename NULL, //TheCU, MDString::get(VMContext, Name), - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -227,9 +229,9 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, DIType Base) // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type), + NULL, // Filename NULL, //TheCU, NULL, - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), @@ -248,9 +250,9 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), + NULL, // Filename NULL, // TheCU, NULL, // Name - NULL, // Filename ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -268,9 +270,9 @@ DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, assert(Ty.Verify() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -288,9 +290,9 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { assert(FriendTy.Verify() && "Invalid friend type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_friend), + NULL, Ty, NULL, // Name - Ty.getFile(), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -309,9 +311,9 @@ DIDerivedType DIBuilder::createInheritance( // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), + NULL, Ty, NULL, // Name - Ty.getFile(), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -330,9 +332,9 @@ DIDerivedType DIBuilder::createMemberType( // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -353,9 +355,9 @@ DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name, Flags |= DIDescriptor::FlagStaticMember; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/), ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/), @@ -379,9 +381,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name, // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(File), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -406,9 +408,9 @@ DIType DIBuilder::createObjCIVar(StringRef Name, // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), + File.getFileNode(), getNonCompileUnitScope(File), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -480,21 +482,23 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, } /// createClassType - Create debugging information entry for a class. -DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType DerivedFrom, DIArray Elements, - MDNode *VTableHolder, - MDNode *TemplateParams) { +DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, + unsigned Flags, DIType DerivedFrom, + DIArray Elements, + MDNode *VTableHolder, + MDNode *TemplateParams) { assert((!Context || Context.Verify()) && "createClassType should be called with a valid Context"); // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -506,7 +510,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, VTableHolder, TemplateParams }; - DIType R(MDNode::get(VMContext, Elts)); + DICompositeType R(MDNode::get(VMContext, Elts)); assert(R.Verify() && "createClassType should return a verifiable DIType"); return R; } @@ -524,9 +528,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context, // TAG_structure_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -551,9 +555,9 @@ DICompositeType DIBuilder::createUnionType( // TAG_union_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_union_type), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -574,8 +578,8 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), Constant::getNullValue(Type::getInt32Ty(VMContext)), - MDString::get(VMContext, ""), Constant::getNullValue(Type::getInt32Ty(VMContext)), + MDString::get(VMContext, ""), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), 0), @@ -598,9 +602,9 @@ DICompositeType DIBuilder::createEnumerationType( // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -622,9 +626,9 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), + NULL, // Filename/Directory, NULL, //TheCU, MDString::get(VMContext, ""), - NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -645,9 +649,9 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, // A vector is an array type with the FlagVector flag applied. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), + NULL, // Filename/Directory, NULL, //TheCU, MDString::get(VMContext, ""), - NULL, //TheCU, ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -725,29 +729,6 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() { return DIDescriptor(MDNode::get(VMContext, Elts)); } -/// createTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::createTemporaryType() { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts); - return DIType(Node); -} - -/// createTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::createTemporaryType(DIFile F) { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { - GetTagConstant(VMContext, DW_TAG_base_type), - TheCU, - NULL, - F - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts); - return DIType(Node); -} - /// createForwardDecl - Create a temporary forward-declared type that /// can be RAUW'd if the full type is seen. DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, @@ -758,9 +739,9 @@ DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, // Create a temporary MDNode. Value *Elts[] = { GetTagConstant(VMContext, Tag), + F.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - F, ConstantInt::get(Type::getInt32Ty(VMContext), Line), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), @@ -798,17 +779,18 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) { return DISubrange(MDNode::get(VMContext, Elts)); } -/// createGlobalVariable - Create a new descriptor for the specified global. +/// \brief Create a new descriptor for the specified global. DIGlobalVariable DIBuilder:: -createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, - DIType Ty, bool isLocalToUnit, Value *Val) { +createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F, + unsigned LineNumber, DIType Ty, bool isLocalToUnit, + Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), Constant::getNullValue(Type::getInt32Ty(VMContext)), NULL, // TheCU, MDString::get(VMContext, Name), MDString::get(VMContext, Name), - MDString::get(VMContext, Name), + MDString::get(VMContext, LinkageName), F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), Ty, @@ -822,6 +804,14 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, return DIGlobalVariable(Node); } +/// \brief Create a new descriptor for the specified global. +DIGlobalVariable DIBuilder:: +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, Value *Val) { + return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit, + Val); +} + /// createStaticVariable - Create a new descriptor for the specified static /// variable. DIGlobalVariable DIBuilder:: @@ -920,12 +910,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), - Constant::getNullValue(Type::getInt32Ty(VMContext)), + File.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Ty, ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), @@ -946,7 +935,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, // Create a named metadata so that we do not lose this mdnode. if (isDefinition) AllSubprograms.push_back(Node); - return DISubprogram(Node); + DISubprogram S(Node); + assert(S.Verify() && "createFunction should return a valid DISubprogram"); + return S; } /// createMethod - Create a new descriptor for the specified C++ method. @@ -966,12 +957,11 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), - Constant::getNullValue(Type::getInt32Ty(VMContext)), + F.getFileNode(), getNonCompileUnitScope(Context), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), - F, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), Ty, ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), @@ -991,7 +981,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, MDNode *Node = MDNode::get(VMContext, Elts); if (isDefinition) AllSubprograms.push_back(Node); - return DISubprogram(Node); + DISubprogram S(Node); + assert(S.Verify() && "createMethod should return a valid DISubprogram"); + return S; } /// createNameSpace - This creates new descriptor for a namespace @@ -1000,9 +992,9 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_namespace), + File.getFileNode(), getNonCompileUnitScope(Scope), MDString::get(VMContext, Name), - File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; DINameSpace R(MDNode::get(VMContext, Elts)); @@ -1017,8 +1009,8 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, DIFile File) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), - Scope, - File + File.getFileNode(), + Scope }; DILexicalBlockFile R(MDNode::get(VMContext, Elts)); assert( @@ -1033,10 +1025,10 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, static unsigned int unique_id = 0; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), + File.getFileNode(), getNonCompileUnitScope(Scope), ConstantInt::get(Type::getInt32Ty(VMContext), Line), ConstantInt::get(Type::getInt32Ty(VMContext), Col), - File, ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) }; DILexicalBlock R(MDNode::get(VMContext, Elts)); diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index f09de3a731..ecd5216f20 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -438,6 +438,12 @@ DataLayout::~DataLayout() { delete static_cast<StructLayoutMap*>(LayoutMap); } +bool DataLayout::doFinalization(Module &M) { + delete static_cast<StructLayoutMap*>(LayoutMap); + LayoutMap = 0; + return false; +} + const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { if (!LayoutMap) LayoutMap = new StructLayoutMap(); @@ -504,47 +510,6 @@ std::string DataLayout::getStringRepresentation() const { } -uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const { - assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!"); - switch (Ty->getTypeID()) { - case Type::LabelTyID: - return getPointerSizeInBits(0); - case Type::PointerTyID: { - unsigned AS = dyn_cast<PointerType>(Ty)->getAddressSpace(); - return getPointerSizeInBits(AS); - } - case Type::ArrayTyID: { - ArrayType *ATy = cast<ArrayType>(Ty); - return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements(); - } - case Type::StructTyID: - // Get the layout annotation... which is lazily created on demand. - return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); - case Type::IntegerTyID: - return cast<IntegerType>(Ty)->getBitWidth(); - case Type::HalfTyID: - return 16; - case Type::FloatTyID: - return 32; - case Type::DoubleTyID: - case Type::X86_MMXTyID: - return 64; - case Type::PPC_FP128TyID: - case Type::FP128TyID: - return 128; - // In memory objects this is always aligned to a higher boundary, but - // only 80 bits contain information. - case Type::X86_FP80TyID: - return 80; - case Type::VectorTyID: { - VectorType *VTy = cast<VectorType>(Ty); - return VTy->getNumElements()*getTypeSizeInBits(VTy->getElementType()); - } - default: - llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type"); - } -} - /*! \param abi_or_pref Flag that determines which alignment is returned. true returns the ABI alignment, false returns the preferred alignment. @@ -656,6 +621,13 @@ Type *DataLayout::getIntPtrType(Type *Ty) const { return IntTy; } +Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const { + for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i) + if (Width <= LegalIntWidths[i]) + return Type::getIntNTy(C, LegalIntWidths[i]); + return 0; +} + uint64_t DataLayout::getIndexedOffset(Type *ptrTy, ArrayRef<Value *> Indices) const { Type *Ty = ptrTy; diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index e85d4adf77..0ffe99d704 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::dwarf; @@ -66,18 +67,28 @@ bool DIDescriptor::Verify() const { DITemplateValueParameter(DbgNode).Verify()); } -StringRef -DIDescriptor::getStringField(unsigned Elt) const { - if (DbgNode == 0) - return StringRef(); +static Value *getField(const MDNode *DbgNode, unsigned Elt) { + if (DbgNode == 0 || Elt >= DbgNode->getNumOperands()) + return 0; + return DbgNode->getOperand(Elt); +} - if (Elt < DbgNode->getNumOperands()) - if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt))) - return MDS->getString(); +static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) { + if (const MDNode *R = dyn_cast_or_null<MDNode>(getField(DbgNode, Elt))) + return R; + return 0; +} +static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) { + if (MDString *MDS = dyn_cast_or_null<MDString>(getField(DbgNode, Elt))) + return MDS->getString(); return StringRef(); } +StringRef DIDescriptor::getStringField(unsigned Elt) const { + return ::getStringField(DbgNode, Elt); +} + uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { if (DbgNode == 0) return 0; @@ -321,7 +332,7 @@ bool DIDescriptor::isEnumerator() const { return DbgNode && getTag() == dwarf::DW_TAG_enumerator; } -/// isObjCProperty - Return true if the specified tag is DW_TAG +/// isObjCProperty - Return true if the specified tag is DW_TAG_APPLE_property. bool DIDescriptor::isObjCProperty() const { return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; } @@ -407,7 +418,7 @@ bool DICompileUnit::Verify() const { if (N.empty()) return false; // It is possible that directory and produce string is empty. - return DbgNode->getNumOperands() == 15; + return DbgNode->getNumOperands() == 12; } /// Verify - Verify that an ObjC property is well formed. @@ -475,7 +486,7 @@ bool DISubprogram::Verify() const { DICompositeType Ty = getType(); if (!Ty.Verify()) return false; - return DbgNode->getNumOperands() == 21; + return DbgNode->getNumOperands() == 20; } /// Verify - Verify that a global variable descriptor is well formed. @@ -529,9 +540,14 @@ bool DINameSpace::Verify() const { return DbgNode->getNumOperands() == 5; } +/// \brief Retrieve the MDNode for the directory/file pair. +MDNode *DIFile::getFileNode() const { + return const_cast<MDNode*>(getNodeField(DbgNode, 1)); +} + /// \brief Verify that the file descriptor is well formed. bool DIFile::Verify() const { - return isFile() && DbgNode->getNumOperands() == 4; + return isFile() && DbgNode->getNumOperands() == 2; } /// \brief Verify that the enumerator descriptor is well formed. @@ -600,6 +616,25 @@ MDNode *DIDerivedType::getObjCProperty() const { return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10)); } +/// \brief Set the array of member DITypes. +void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) { + assert((!TParams || DbgNode->getNumOperands() == 14) && + "If you're setting the template parameters this should include a slot " + "for that!"); + TrackingVH<MDNode> N(*this); + N->replaceOperandWith(10, Elements); + if (TParams) + N->replaceOperandWith(13, TParams); + DbgNode = N; +} + +/// \brief Set the containing type. +void DICompositeType::setContainingType(DICompositeType ContainingType) { + TrackingVH<MDNode> N(*this); + N->replaceOperandWith(12, ContainingType); + DbgNode = N; +} + /// isInlinedFnArgument - Return true if this variable provides debugging /// information for an inlined function arguments. bool DIVariable::isInlinedFnArgument(const Function *CurFn) { @@ -627,21 +662,21 @@ bool DISubprogram::describes(const Function *F) { unsigned DISubprogram::isOptimized() const { assert (DbgNode && "Invalid subprogram descriptor!"); - if (DbgNode->getNumOperands() == 16) - return getUnsignedField(15); + if (DbgNode->getNumOperands() == 15) + return getUnsignedField(14); return 0; } MDNode *DISubprogram::getVariablesNodes() const { - if (!DbgNode || DbgNode->getNumOperands() <= 19) + if (!DbgNode || DbgNode->getNumOperands() <= 18) return NULL; - return dyn_cast_or_null<MDNode>(DbgNode->getOperand(19)); + return dyn_cast_or_null<MDNode>(DbgNode->getOperand(18)); } DIArray DISubprogram::getVariables() const { - if (!DbgNode || DbgNode->getNumOperands() <= 19) + if (!DbgNode || DbgNode->getNumOperands() <= 18) return DIArray(); - if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(19))) + if (MDNode *T = dyn_cast_or_null<MDNode>(DbgNode->getOperand(18))) return DIArray(T); return DIArray(); } @@ -649,76 +684,48 @@ DIArray DISubprogram::getVariables() const { StringRef DIScope::getFilename() const { if (!DbgNode) return StringRef(); - if (isLexicalBlockFile()) - return DILexicalBlockFile(DbgNode).getFilename(); - if (isLexicalBlock()) - return DILexicalBlock(DbgNode).getFilename(); - if (isSubprogram()) - return DISubprogram(DbgNode).getFilename(); - if (isCompileUnit()) - return DICompileUnit(DbgNode).getFilename(); - if (isNameSpace()) - return DINameSpace(DbgNode).getFilename(); - if (isType()) - return DIType(DbgNode).getFilename(); - if (isFile()) - return DIFile(DbgNode).getFilename(); - llvm_unreachable("Invalid DIScope!"); + return ::getStringField(getNodeField(DbgNode, 1), 0); } StringRef DIScope::getDirectory() const { if (!DbgNode) return StringRef(); - if (isLexicalBlockFile()) - return DILexicalBlockFile(DbgNode).getDirectory(); - if (isLexicalBlock()) - return DILexicalBlock(DbgNode).getDirectory(); - if (isSubprogram()) - return DISubprogram(DbgNode).getDirectory(); - if (isCompileUnit()) - return DICompileUnit(DbgNode).getDirectory(); - if (isNameSpace()) - return DINameSpace(DbgNode).getDirectory(); - if (isType()) - return DIType(DbgNode).getDirectory(); - if (isFile()) - return DIFile(DbgNode).getDirectory(); - llvm_unreachable("Invalid DIScope!"); + return ::getStringField(getNodeField(DbgNode, 1), 1); } DIArray DICompileUnit::getEnumTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(7))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getRetainedTypes() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(11))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(8))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getSubprograms() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(9))) return DIArray(N); return DIArray(); } DIArray DICompileUnit::getGlobalVariables() const { - if (!DbgNode || DbgNode->getNumOperands() < 14) + if (!DbgNode || DbgNode->getNumOperands() < 12) return DIArray(); - if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(13))) + if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(10))) return DIArray(N); return DIArray(); } @@ -1026,6 +1033,8 @@ void DIDescriptor::print(raw_ostream &OS) const { DIVariable(DbgNode).printInternal(OS); } else if (this->isObjCProperty()) { DIObjCProperty(DbgNode).printInternal(OS); + } else if (this->isNameSpace()) { + DINameSpace(DbgNode).printInternal(OS); } else if (this->isScope()) { DIScope(DbgNode).printInternal(OS); } @@ -1099,6 +1108,14 @@ void DICompositeType::printInternal(raw_ostream &OS) const { OS << " [" << A.getNumElements() << " elements]"; } +void DINameSpace::printInternal(raw_ostream &OS) const { + StringRef Name = getName(); + if (!Name.empty()) + OS << " [" << Name << ']'; + + OS << " [line " << getLineNumber() << ']'; +} + void DISubprogram::printInternal(raw_ostream &OS) const { // TODO : Print context OS << " [line " << getLineNumber() << ']'; diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index 5559a6c56e..1e72b90a13 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -211,7 +211,7 @@ Function::~Function() { clearGC(); // Remove the intrinsicID from the Cache. - if(getValueName() && isIntrinsic()) + if (getValueName() && isIntrinsic()) getContext().pImpl->IntrinsicIDCache.erase(this); } @@ -352,7 +352,7 @@ unsigned Function::getIntrinsicID() const { LLVMContextImpl::IntrinsicIDCacheTy &IntrinsicIDCache = getContext().pImpl->IntrinsicIDCache; - if(!IntrinsicIDCache.count(this)) { + if (!IntrinsicIDCache.count(this)) { unsigned Id = lookupIntrinsicID(); IntrinsicIDCache[this]=Id; return Id; diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 2e3a525826..d58877ef77 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -3000,8 +3000,8 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) { uint32_t BitWidth = C.getBitWidth(); switch (pred) { default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!"); - case ICmpInst::ICMP_EQ: Upper++; break; - case ICmpInst::ICMP_NE: Lower++; break; + case ICmpInst::ICMP_EQ: ++Upper; break; + case ICmpInst::ICMP_NE: ++Lower; break; case ICmpInst::ICMP_ULT: Lower = APInt::getMinValue(BitWidth); // Check for an empty-set condition. @@ -3015,25 +3015,25 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) { return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_UGT: - Lower++; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max) + ++Lower; Upper = APInt::getMinValue(BitWidth); // Min = Next(Max) // Check for an empty-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_SGT: - Lower++; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max) + ++Lower; Upper = APInt::getSignedMinValue(BitWidth); // Min = Next(Max) // Check for an empty-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/false); break; case ICmpInst::ICMP_ULE: - Lower = APInt::getMinValue(BitWidth); Upper++; + Lower = APInt::getMinValue(BitWidth); ++Upper; // Check for a full-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/true); break; case ICmpInst::ICMP_SLE: - Lower = APInt::getSignedMinValue(BitWidth); Upper++; + Lower = APInt::getSignedMinValue(BitWidth); ++Upper; // Check for a full-set condition. if (Lower == Upper) return ConstantRange(BitWidth, /*isFullSet=*/true); diff --git a/lib/IRReader/CMakeLists.txt b/lib/IRReader/CMakeLists.txt new file mode 100644 index 0000000000..cf10d8b7db --- /dev/null +++ b/lib/IRReader/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMIRReader + IRReader.cpp + ) diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp new file mode 100644 index 0000000000..fb87cbd8bf --- /dev/null +++ b/lib/IRReader/IRReader.cpp @@ -0,0 +1,78 @@ +//===---- IRReader.cpp - Reader for LLVM IR files -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IRReader/IRReader.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Assembly/Parser.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/system_error.h" + +using namespace llvm; + +Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, + LLVMContext &Context) { + if (isBitcode((const unsigned char *)Buffer->getBufferStart(), + (const unsigned char *)Buffer->getBufferEnd())) { + std::string ErrMsg; + Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg); + if (M == 0) { + Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, + ErrMsg); + // ParseBitcodeFile does not take ownership of the Buffer in the + // case of an error. + delete Buffer; + } + return M; + } + + return ParseAssembly(Buffer, 0, Err, Context); +} + +Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err, + LLVMContext &Context) { + OwningPtr<MemoryBuffer> File; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { + Err = SMDiagnostic(Filename, SourceMgr::DK_Error, + "Could not open input file: " + ec.message()); + return 0; + } + + return getLazyIRModule(File.take(), Err, Context); +} + +Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, + LLVMContext &Context) { + if (isBitcode((const unsigned char *)Buffer->getBufferStart(), + (const unsigned char *)Buffer->getBufferEnd())) { + std::string ErrMsg; + Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg); + if (M == 0) + Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, + ErrMsg); + // ParseBitcodeFile does not take ownership of the Buffer. + delete Buffer; + return M; + } + + return ParseAssembly(Buffer, 0, Err, Context); +} + +Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err, + LLVMContext &Context) { + OwningPtr<MemoryBuffer> File; + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { + Err = SMDiagnostic(Filename, SourceMgr::DK_Error, + "Could not open input file: " + ec.message()); + return 0; + } + + return ParseIR(File.take(), Err, Context); +} diff --git a/lib/IRReader/LLVMBuild.txt b/lib/IRReader/LLVMBuild.txt new file mode 100644 index 0000000000..b7bc74d616 --- /dev/null +++ b/lib/IRReader/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/IRReader/LLVMBuild.txt -----------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = IRReader +parent = Libraries +required_libraries = AsmParser BitReader Core Support diff --git a/lib/IRReader/Makefile b/lib/IRReader/Makefile new file mode 100644 index 0000000000..cf6bc11354 --- /dev/null +++ b/lib/IRReader/Makefile @@ -0,0 +1,14 @@ +##===- lib/IRReader/Makefile -------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME := LLVMIRReader +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt index a31793c5fd..0565443806 100644 --- a/lib/LLVMBuild.txt +++ b/lib/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR MC Object Option Support TableGen Target Transforms +subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker IR IRReader MC Object Option Support TableGen Target Transforms [component_0] type = Group diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 0acbcfadaf..74cbdadd61 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -17,13 +17,13 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/TypeFinder.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp index c8ea8ff0a9..74d24f278b 100644 --- a/lib/Linker/Linker.cpp +++ b/lib/Linker/Linker.cpp @@ -15,7 +15,6 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" using namespace llvm; @@ -24,7 +23,6 @@ Linker::Linker(StringRef progname, StringRef modname, LLVMContext& C, unsigned flags): Context(C), Composite(new Module(modname, C)), - LibPaths(), Flags(flags), Error(), ProgramName(progname) { } @@ -32,7 +30,6 @@ Linker::Linker(StringRef progname, StringRef modname, Linker::Linker(StringRef progname, Module* aModule, unsigned flags) : Context(aModule->getContext()), Composite(aModule), - LibPaths(), Flags(flags), Error(), ProgramName(progname) { } @@ -63,27 +60,9 @@ Linker::verbose(StringRef message) { errs() << " " << message << "\n"; } -void -Linker::addPath(const sys::Path& path) { - LibPaths.push_back(path); -} - -void -Linker::addPaths(const std::vector<std::string>& paths) { - for (unsigned i = 0, e = paths.size(); i != e; ++i) - LibPaths.push_back(sys::Path(paths[i])); -} - -void -Linker::addSystemPaths() { - sys::Path::GetBitcodeLibraryPaths(LibPaths); - LibPaths.insert(LibPaths.begin(),sys::Path("./")); -} - Module* Linker::releaseModule() { Module* result = Composite; - LibPaths.clear(); Error.clear(); Composite = 0; Flags = 0; diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 26d378e6c0..9adcc02b71 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -304,8 +304,8 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked // to not be less than one. This needs to be change to be not less than zero. - std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; - std::vector<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID]; + SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; + SmallVectorImpl<StringRef>& MCDwarfDirs = MCDwarfDirsCUMap[CUID]; // Make space for this FileNumber in the MCDwarfFiles vector if needed. if (FileNumber >= MCDwarfFiles.size()) { MCDwarfFiles.resize(FileNumber + 1); @@ -366,7 +366,7 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, /// isValidDwarfFileNumber - takes a dwarf file number and returns true if it /// currently is assigned and false otherwise. bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) { - std::vector<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; + SmallVectorImpl<MCDwarfFile *>& MCDwarfFiles = MCDwarfFilesCUMap[CUID]; if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size()) return false; diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index d3fa906a06..4766b37476 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -44,41 +44,49 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, // Get the assembler info needed to setup the MCContext. const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple); - assert(MAI && "Unable to create target asm info!"); + if (!MAI) + return 0; const MCInstrInfo *MII = TheTarget->createMCInstrInfo(); - assert(MII && "Unable to create target instruction info!"); + if (!MII) + return 0; const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); - assert(MRI && "Unable to create target register info!"); + if (!MRI) + return 0; // Package up features to be passed to target/subtarget std::string FeaturesStr; const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU, FeaturesStr); - assert(STI && "Unable to create subtarget info!"); + if (!STI) + return 0; // Set up the MCContext for creating symbols and MCExpr's. MCContext *Ctx = new MCContext(*MAI, *MRI, 0); - assert(Ctx && "Unable to create MCContext!"); + if (!Ctx) + return 0; // Set up disassembler. MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI); - assert(DisAsm && "Unable to create disassembler!"); + if (!DisAsm) + return 0; DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, *MAI, *MII, *MRI, *STI); - assert(IP && "Unable to create instruction printer!"); + if (!IP) + return 0; LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP); - assert(DC && "Allocation failure!"); + if (!DC) + return 0; return DC; } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index c81abe952a..0f8f0741bd 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -298,7 +298,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { // Put out the directory and file tables. // First the directory table. - const std::vector<StringRef> &MCDwarfDirs = + const SmallVectorImpl<StringRef> &MCDwarfDirs = context.getMCDwarfDirs(CUID); for (unsigned i = 0; i < MCDwarfDirs.size(); i++) { MCOS->EmitBytes(MCDwarfDirs[i]); // the DirectoryName @@ -307,7 +307,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { MCOS->EmitIntValue(0, 1); // Terminate the directory list // Second the file table. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(CUID); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { MCOS->EmitBytes(MCDwarfFiles[i]->getName()); // FileName @@ -643,13 +643,13 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, // AT_name, the name of the source file. Reconstruct from the first directory // and file table entries. - const std::vector<StringRef> &MCDwarfDirs = + const SmallVectorImpl<StringRef> &MCDwarfDirs = context.getMCDwarfDirs(); if (MCDwarfDirs.size() > 0) { MCOS->EmitBytes(MCDwarfDirs[0]); MCOS->EmitBytes("/"); } - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(); MCOS->EmitBytes(MCDwarfFiles[1]->getName()); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 1a53934fef..cd4d144575 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -194,7 +194,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_TPOFF: return "TPOFF"; case VK_DTPOFF: return "DTPOFF"; case VK_TLVP: return "TLVP"; - case VK_SECREL: return "SECREL"; + case VK_SECREL: return "SECREL32"; case VK_ARM_NONE: return "(NONE)"; case VK_ARM_PLT: return "(PLT)"; case VK_ARM_GOT: return "(GOT)"; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 2e1a045569..0d32ad40e8 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -223,6 +223,12 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { } void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { + if (T.getArch() != Triple::mips && + T.getArch() != Triple::mipsel && + T.getArch() != Triple::mips64 && + T.getArch() != Triple::mips64el ) + FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + if (T.getArch() == Triple::x86) { PersonalityEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 @@ -230,15 +236,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { LSDAEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; - FDEEncoding = FDECFIEncoding = (RelocM == Reloc::PIC_) + FDEEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; TTypeEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; } else if (T.getArch() == Triple::x86_64) { - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - if (RelocM == Reloc::PIC_) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium) @@ -261,8 +265,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; } } else if (T.getArch() == Triple::aarch64) { - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. @@ -282,7 +284,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { } else if (T.getArch() == Triple::ppc64) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; - FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 6ab49ec92c..804734cea9 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -626,7 +626,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { return TokError("unmatched .ifs or .elses"); // Check to see there are no empty DwarfFile slots. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = getContext().getMCDwarfFiles(); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { if (!MCDwarfFiles[i]) @@ -1495,7 +1495,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // If we previously parsed a cpp hash file line comment then make sure the // current Dwarf File is for the CppHashFilename if not then emit the // Dwarf File table for it and adjust the line number for the .loc. - const std::vector<MCDwarfFile *> &MCDwarfFiles = + const SmallVectorImpl<MCDwarfFile *> &MCDwarfFiles = getContext().getMCDwarfFiles(); if (CppHashFilename.size() != 0) { if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() != @@ -4105,12 +4105,8 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, MCParsedAsmOperand *Operand = Info.ParsedOperands[i]; // Immediate. - if (Operand->isImm()) { - if (Operand->needAsmRewrite()) - AsmStrRewrites.push_back(AsmRewrite(AOK_ImmPrefix, - Operand->getStartLoc())); + if (Operand->isImm()) continue; - } // Register operand. if (Operand->isReg() && !Operand->needAddressOf()) { @@ -4131,11 +4127,6 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, continue; bool isOutput = (i == 1) && Desc.mayStore(); - if (Operand->isMem() && Operand->needSizeDirective()) - AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective, - Operand->getStartLoc(), /*Len*/0, - Operand->getMemSize())); - if (isOutput) { ++InputIdx; OutputDecls.push_back(OpDecl); @@ -4184,28 +4175,31 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, // Build the IR assembly string. std::string AsmStringIR; - AsmRewriteKind PrevKind = AOK_Imm; raw_string_ostream OS(AsmStringIR); - const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart(); + const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart(); + const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd(); array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort); for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { const char *Loc = (*I).Loc.getPointer(); - assert(Loc >= Start && "Expected Loc to be after Start!"); + assert(Loc >= AsmStart && "Expected Loc to be at or after Start!"); unsigned AdditionalSkip = 0; AsmRewriteKind Kind = (*I).Kind; - // Emit everything up to the immediate/expression. If the previous rewrite - // was a size directive, then this has already been done. - if (PrevKind != AOK_SizeDirective) - OS << StringRef(Start, Loc - Start); - PrevKind = Kind; + // Emit everything up to the immediate/expression. + unsigned Len = Loc - AsmStart; + if (Len) { + // For Input/Output operands we need to remove the brackets, if present. + if ((Kind == AOK_Input || Kind == AOK_Output) && Loc[-1] == '[') + --Len; + OS << StringRef(AsmStart, Len); + } // Skip the original expression. if (Kind == AOK_Skip) { - Start = Loc + (*I).Len; + AsmStart = Loc + (*I).Len; continue; } @@ -4254,14 +4248,17 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, } // Skip the original expression. - if (Kind != AOK_SizeDirective) - Start = Loc + (*I).Len + AdditionalSkip; + AsmStart = Loc + (*I).Len + AdditionalSkip; + + // For Input/Output operands we need to remove the brackets, if present. + if ((Kind == AOK_Input || Kind == AOK_Output) && AsmStart != AsmEnd && + *AsmStart == ']') + ++AsmStart; } // Emit the remainder of the asm string. - const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd(); - if (Start != AsmEnd) - OS << StringRef(Start, AsmEnd - Start); + if (AsmStart != AsmEnd) + OS << StringRef(AsmStart, AsmEnd - AsmStart); AsmString = OS.str(); return false; diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 51ef415542..d02e5535bd 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -24,7 +24,7 @@ using namespace llvm; MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx) : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) { - const MCSection *section = NULL; + const MCSection *section = 0; SectionStack.push_back(std::make_pair(section, section)); } @@ -40,7 +40,7 @@ void MCStreamer::reset() { EmitDebugFrame = false; CurrentW64UnwindInfo = 0; LastSymbol = 0; - const MCSection *section = NULL; + const MCSection *section = 0; SectionStack.clear(); SectionStack.push_back(std::make_pair(section, section)); } @@ -172,7 +172,7 @@ void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() { if (FrameInfos.empty()) - return NULL; + return 0; return &FrameInfos.back(); } @@ -473,7 +473,7 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { report_fatal_error("Frame register and offset already specified!"); if (Offset & 0x0F) report_fatal_error("Misaligned frame pointer offset!"); - MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, NULL, Register, Offset); + MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset); CurFrame->LastFrameInst = CurFrame->Instructions.size(); CurFrame->Instructions.push_back(Inst); } @@ -623,5 +623,5 @@ void MCStreamer::Finish() { MCSymbolData &MCStreamer::getOrCreateSymbolData(MCSymbol *Symbol) { report_fatal_error("Not supported!"); - return *(static_cast<MCSymbolData*> (NULL)); + return *(static_cast<MCSymbolData*>(0)); } diff --git a/lib/Makefile b/lib/Makefile index 043eda6b99..57f016bc89 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,8 @@ LEVEL = .. include $(LEVEL)/Makefile.config PARALLEL_DIRS := IR AsmParser Bitcode Archive Analysis Transforms CodeGen \ - Target ExecutionEngine Linker MC Object Option DebugInfo + Target ExecutionEngine Linker MC Object Option DebugInfo \ + IRReader include $(LEVEL)/Makefile.common diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 5b68fbb270..6182e34150 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -3311,10 +3311,8 @@ namespace { significand = significand.udiv(divisor); - // Truncate the significand down to its active bit count, but - // don't try to drop below 32. - unsigned newPrecision = std::max(32U, significand.getActiveBits()); - significand = significand.trunc(newPrecision); + // Truncate the significand down to its active bit count. + significand = significand.trunc(significand.getActiveBits()); } diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 07cb057b48..e8534753b4 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -559,12 +559,12 @@ bool APInt::slt(const APInt& RHS) const { if (lhsNeg) { // Sign bit is set so perform two's complement to make it positive lhs.flipAllBits(); - lhs++; + ++lhs; } if (rhsNeg) { // Sign bit is set so perform two's complement to make it positive rhs.flipAllBits(); - rhs++; + ++rhs; } // Now we have unsigned values to compare so do the comparison if necessary @@ -2116,7 +2116,7 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { } // If its negative, put it in two's complement form if (isNeg) { - (*this)--; + --(*this); this->flipAllBits(); } } @@ -2197,7 +2197,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, // Flip the bits and add one to turn it into the equivalent positive // value and put a '-' in the result. Tmp.flipAllBits(); - Tmp++; + ++Tmp; Str.push_back('-'); } diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 5ba69fc3c8..3746a81011 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -83,6 +83,7 @@ add_llvm_library(LLVMSupport Threading.cpp TimeValue.cpp Valgrind.cpp + Watchdog.cpp Unix/Host.inc Unix/Memory.inc Unix/Mutex.inc @@ -95,6 +96,7 @@ add_llvm_library(LLVMSupport Unix/system_error.inc Unix/ThreadLocal.inc Unix/TimeValue.inc + Unix/Watchdog.inc Windows/DynamicLibrary.inc Windows/Host.inc Windows/Memory.inc @@ -108,4 +110,5 @@ add_llvm_library(LLVMSupport Windows/system_error.inc Windows/ThreadLocal.inc Windows/TimeValue.inc + Windows/Watchdog.inc ) diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index d4382e54e0..f4b591e777 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -49,21 +49,21 @@ void llvm::remove_fatal_error_handler() { ErrorHandler = 0; } -void llvm::report_fatal_error(const char *Reason) { - report_fatal_error(Twine(Reason)); +void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) { + report_fatal_error(Twine(Reason), GenCrashDiag); } -void llvm::report_fatal_error(const std::string &Reason) { - report_fatal_error(Twine(Reason)); +void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) { + report_fatal_error(Twine(Reason), GenCrashDiag); } -void llvm::report_fatal_error(StringRef Reason) { - report_fatal_error(Twine(Reason)); +void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) { + report_fatal_error(Twine(Reason), GenCrashDiag); } -void llvm::report_fatal_error(const Twine &Reason) { +void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { if (ErrorHandler) { - ErrorHandler(ErrorHandlerUserData, Reason.str()); + ErrorHandler(ErrorHandlerUserData, Reason.str(), GenCrashDiag); } else { // Blast the result out to stderr. We don't try hard to make sure this // succeeds (e.g. handling EINTR) and we can't use errs() here because diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index cd430f218b..1ee69b6023 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -70,8 +70,8 @@ error_code FileOutputBuffer::create(StringRef FilePath, if (EC) return EC; - OwningPtr<mapped_file_region> MappedFile( - new mapped_file_region(FD, mapped_file_region::readwrite, Size, 0, EC)); + OwningPtr<mapped_file_region> MappedFile(new mapped_file_region( + FD, true, mapped_file_region::readwrite, Size, 0, EC)); if (EC) return EC; diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 4c558b37cf..7c5ab96a76 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -72,13 +72,17 @@ static void CopyStringRef(char *Memory, StringRef Data) { Memory[Data.size()] = 0; // Null terminate string. } -/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it. -template <typename T> -static T *GetNamedBuffer(StringRef Buffer, StringRef Name, - bool RequiresNullTerminator) { - char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1)); - CopyStringRef(Mem + sizeof(T), Name); - return new (Mem) T(Buffer, RequiresNullTerminator); +namespace { +struct NamedBufferAlloc { + StringRef Name; + NamedBufferAlloc(StringRef Name) : Name(Name) {} +}; +} + +void *operator new(size_t N, const NamedBufferAlloc &Alloc) { + char *Mem = static_cast<char *>(operator new(N + Alloc.Name.size() + 1)); + CopyStringRef(Mem + N, Alloc.Name); + return Mem; } namespace { @@ -105,8 +109,8 @@ public: MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName, bool RequiresNullTerminator) { - return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName, - RequiresNullTerminator); + return new (NamedBufferAlloc(BufferName)) + MemoryBufferMem(InputData, RequiresNullTerminator); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, @@ -183,24 +187,38 @@ error_code MemoryBuffer::getFileOrSTDIN(const char *Filename, //===----------------------------------------------------------------------===// namespace { -/// MemoryBufferMMapFile - This represents a file that was mapped in with the -/// sys::Path::MapInFilePages method. When destroyed, it calls the -/// sys::Path::UnMapFilePages method. -class MemoryBufferMMapFile : public MemoryBufferMem { -public: - MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator) - : MemoryBufferMem(Buffer, RequiresNullTerminator) { } +/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region. +/// +/// This handles converting the offset into a legal offset on the platform. +class MemoryBufferMMapFile : public MemoryBuffer { + sys::fs::mapped_file_region MFR; + + static uint64_t getLegalMapOffset(uint64_t Offset) { + return Offset & ~(sys::fs::mapped_file_region::alignment() - 1); + } + + static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) { + return Len + (Offset - getLegalMapOffset(Offset)); + } - ~MemoryBufferMMapFile() { - static int PageSize = sys::process::get_self()->page_size(); + const char *getStart(uint64_t Len, uint64_t Offset) { + return MFR.const_data() + (Offset - getLegalMapOffset(Offset)); + } - uintptr_t Start = reinterpret_cast<uintptr_t>(getBufferStart()); - size_t Size = getBufferSize(); - uintptr_t RealStart = Start & ~(PageSize - 1); - size_t RealSize = Size + (Start - RealStart); +public: + MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len, + uint64_t Offset, error_code EC) + : MFR(FD, false, sys::fs::mapped_file_region::readonly, + getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) { + if (!EC) { + const char *Start = getStart(Len, Offset); + init(Start, Start + Len, RequiresNullTerminator); + } + } - sys::Path::UnMapFilePages(reinterpret_cast<const char*>(RealStart), - RealSize); + virtual const char *getBufferIdentifier() const LLVM_OVERRIDE { + // The name is stored after the class itself. + return reinterpret_cast<const char *>(this + 1); } virtual BufferKind getBufferKind() const LLVM_OVERRIDE { @@ -344,17 +362,11 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, PageSize)) { - off_t RealMapOffset = Offset & ~(PageSize - 1); - off_t Delta = Offset - RealMapOffset; - size_t RealMapSize = MapSize + Delta; - - if (const char *Pages = sys::Path::MapInFilePages(FD, - RealMapSize, - RealMapOffset)) { - result.reset(GetNamedBuffer<MemoryBufferMMapFile>( - StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); + error_code EC; + result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( + RequiresNullTerminator, FD, MapSize, Offset, EC)); + if (!EC) return error_code::success(); - } } MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp index 41add96194..58a6ea720e 100644 --- a/lib/Support/PathV2.cpp +++ b/lib/Support/PathV2.cpp @@ -18,6 +18,9 @@ #include <cctype> #include <cstdio> #include <cstring> +#ifdef __APPLE__ +#include <unistd.h> +#endif namespace { using llvm::StringRef; @@ -493,6 +496,27 @@ bool is_separator(char value) { void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) { result.clear(); +#ifdef __APPLE__ + // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. + int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR + : _CS_DARWIN_USER_CACHE_DIR; + size_t ConfLen = confstr(ConfName, 0, 0); + if (ConfLen > 0) { + do { + result.resize(ConfLen); + ConfLen = confstr(ConfName, result.data(), result.size()); + } while (ConfLen > 0 && ConfLen != result.size()); + + if (ConfLen > 0) { + assert(result.back() == 0); + result.pop_back(); + return; + } + + result.clear(); + } +#endif + // Check whether the temporary directory is specified by an environment // variable. const char *EnvironmentVariable; diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 21d56adb5e..23ee5ab105 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -17,6 +17,7 @@ #include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/Signals.h" #include "llvm/Support/ThreadLocal.h" +#include "llvm/Support/Watchdog.h" #include "llvm/Support/raw_ostream.h" #ifdef HAVE_CRASHREPORTERCLIENT_H @@ -37,7 +38,10 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ if (Entry->getNextEntry()) NextID = PrintStack(Entry->getNextEntry(), OS); OS << NextID << ".\t"; - Entry->print(OS); + { + sys::Watchdog W(5); + Entry->print(OS); + } return NextID+1; } diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp index 75bc282d9b..201d5c0d30 100644 --- a/lib/Support/Program.cpp +++ b/lib/Support/Program.cpp @@ -29,12 +29,15 @@ Program::ExecuteAndWait(const Path& path, const Path** redirects, unsigned secondsToWait, unsigned memoryLimit, - std::string* ErrMsg) { + std::string* ErrMsg, + bool *ExecutionFailed) { Program prg; - if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) + if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) { + if (ExecutionFailed) *ExecutionFailed = false; return prg.Wait(path, secondsToWait, ErrMsg); - else - return -1; + } + if (ExecutionFailed) *ExecutionFailed = true; + return -1; } void diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 3b53e9ff49..f0fed7792c 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -29,13 +29,9 @@ void SmallPtrSetImpl::shrink_and_clear() { NumElements = NumTombstones = 0; // Install the new array. Clear all the buckets to empty. - CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1)); + CurArray = (const void**)malloc(sizeof(void*) * CurArraySize); assert(CurArray && "Failed to allocate memory?"); memset(CurArray, -1, CurArraySize*sizeof(void*)); - - // The end pointer, always valid, is set to a valid element to help the - // iterator. - CurArray[CurArraySize] = 0; } bool SmallPtrSetImpl::insert_imp(const void * Ptr) { @@ -139,15 +135,11 @@ void SmallPtrSetImpl::Grow(unsigned NewSize) { bool WasSmall = isSmall(); // Install the new array. Clear all the buckets to empty. - CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1)); + CurArray = (const void**)malloc(sizeof(void*) * NewSize); assert(CurArray && "Failed to allocate memory?"); CurArraySize = NewSize; memset(CurArray, -1, NewSize*sizeof(void*)); - // The end pointer, always valid, is set to a valid element to help the - // iterator. - CurArray[NewSize] = 0; - // Copy over all the elements. if (WasSmall) { // Small sets store their elements in order. @@ -180,7 +172,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage, CurArray = SmallArray; // Otherwise, allocate new heap space (unless we were the same size) } else { - CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1)); + CurArray = (const void**)malloc(sizeof(void*) * that.CurArraySize); assert(CurArray && "Failed to allocate memory?"); } @@ -188,7 +180,7 @@ SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage, CurArraySize = that.CurArraySize; // Copy over the contents from the other set - memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1)); + memcpy(CurArray, that.CurArray, sizeof(void*)*CurArraySize); NumElements = that.NumElements; NumTombstones = that.NumTombstones; @@ -200,7 +192,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { if (isSmall() && RHS.isSmall()) assert(CurArraySize == RHS.CurArraySize && "Cannot assign sets with different small sizes"); - + // If we're becoming small, prepare to insert into our stack space if (RHS.isSmall()) { if (!isSmall()) @@ -209,9 +201,9 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { // Otherwise, allocate new heap space (unless we were the same size) } else if (CurArraySize != RHS.CurArraySize) { if (isSmall()) - CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1)); + CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize); else - CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1)); + CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize); assert(CurArray && "Failed to allocate memory?"); } @@ -219,7 +211,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { CurArraySize = RHS.CurArraySize; // Copy over the contents from the other set - memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1)); + memcpy(CurArray, RHS.CurArray, sizeof(void*)*CurArraySize); NumElements = RHS.NumElements; NumTombstones = RHS.NumTombstones; diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index e00394ec6a..e9b26bdb80 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -332,7 +332,16 @@ void Memory::InvalidateInstructionCache(const void *Addr, __clear_cache(const_cast<char *>(Start), const_cast<char *>(End)); # elif defined(__mips__) const char *Start = static_cast<const char *>(Addr); +# if defined(ANDROID) + // The declaration of "cacheflush" in Android bionic: + // extern int cacheflush(long start, long end, long flags); + const char *End = Start + Len; + long LStart = reinterpret_cast<long>(const_cast<char *>(Start)); + long LEnd = reinterpret_cast<long>(const_cast<char *>(End)); + cacheflush(LStart, LEnd, BCACHE); +# else cacheflush(const_cast<char *>(Start), Len, BCACHE); +# endif # endif #endif // end apple diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index 44b31b3202..a3dfd4b0a3 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -475,12 +475,14 @@ rety_open_create: return error_code::success(); } -error_code mapped_file_region::init(int fd, uint64_t offset) { - AutoFD FD(fd); +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { + AutoFD ScopedFD(FD); + if (!CloseFD) + ScopedFD.take(); // Figure out how large the file is. struct stat FileInfo; - if (fstat(fd, &FileInfo) == -1) + if (fstat(FD, &FileInfo) == -1) return error_code(errno, system_category()); uint64_t FileSize = FileInfo.st_size; @@ -488,7 +490,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) { Size = FileSize; else if (FileSize < Size) { // We need to grow the file. - if (ftruncate(fd, Size) == -1) + if (ftruncate(FD, Size) == -1) return error_code(errno, system_category()); } @@ -497,7 +499,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) { #ifdef MAP_FILE flags |= MAP_FILE; #endif - Mapping = ::mmap(0, Size, prot, flags, fd, offset); + Mapping = ::mmap(0, Size, prot, flags, FD, Offset); if (Mapping == MAP_FAILED) return error_code(errno, system_category()); return error_code::success(); @@ -526,12 +528,13 @@ mapped_file_region::mapped_file_region(const Twine &path, return; } - ec = init(ofd, offset); + ec = init(ofd, true, offset); if (ec) Mapping = 0; } mapped_file_region::mapped_file_region(int fd, + bool closefd, mapmode mode, uint64_t length, uint64_t offset, @@ -545,7 +548,7 @@ mapped_file_region::mapped_file_region(int fd, return; } - ec = init(fd, offset); + ec = init(fd, closefd, offset); if (ec) Mapping = 0; } diff --git a/lib/Support/Unix/Watchdog.inc b/lib/Support/Unix/Watchdog.inc new file mode 100644 index 0000000000..5d89c0e51b --- /dev/null +++ b/lib/Support/Unix/Watchdog.inc @@ -0,0 +1,32 @@ +//===--- Unix/Watchdog.inc - Unix Watchdog Implementation -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the generic Unix implementation of the Watchdog class. +// +//===----------------------------------------------------------------------===// + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +namespace llvm { + namespace sys { + Watchdog::Watchdog(unsigned int seconds) { +#ifdef HAVE_UNISTD_H + alarm(seconds); +#endif + } + + Watchdog::~Watchdog() { +#ifdef HAVE_UNISTD_H + alarm(0); +#endif + } + } +} diff --git a/lib/Support/Watchdog.cpp b/lib/Support/Watchdog.cpp new file mode 100644 index 0000000000..724aa001f1 --- /dev/null +++ b/lib/Support/Watchdog.cpp @@ -0,0 +1,23 @@ +//===---- Watchdog.cpp - Implement Watchdog ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Watchdog class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Watchdog.h" +#include "llvm/Config/config.h" + +// Include the platform-specific parts of this class. +#ifdef LLVM_ON_UNIX +#include "Unix/Watchdog.inc" +#endif +#ifdef LLVM_ON_WIN32 +#include "Windows/Watchdog.inc" +#endif diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 2e6cc96e7f..23f3d14f91 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -593,6 +593,10 @@ retry_random_path: random_path_utf16.push_back(0); random_path_utf16.pop_back(); + // Make sure we don't fall into an infinite loop by constantly trying + // to create the parent path. + bool TriedToCreateParent = false; + // Try to create + open the path. retry_create_file: HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(), @@ -610,7 +614,9 @@ retry_create_file: if (ec == windows_error::file_exists) goto retry_random_path; // Check for non-existing parent directories. - if (ec == windows_error::path_not_found) { + if (ec == windows_error::path_not_found && !TriedToCreateParent) { + TriedToCreateParent = true; + // Create the directories using result_path as temp storage. if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(), random_path_utf16.size(), result_path)) @@ -705,13 +711,14 @@ error_code get_magic(const Twine &path, uint32_t len, return error_code::success(); } -error_code mapped_file_region::init(int FD, uint64_t Offset) { +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { FileDescriptor = FD; // Make sure that the requested size fits within SIZE_T. if (Size > std::numeric_limits<SIZE_T>::max()) { - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return make_error_code(errc::invalid_argument); } @@ -732,9 +739,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { 0); if (FileMappingHandle == NULL) { error_code ec = windows_error(GetLastError()); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } @@ -754,9 +762,10 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { if (Mapping == NULL) { error_code ec = windows_error(GetLastError()); ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } @@ -768,14 +777,24 @@ error_code mapped_file_region::init(int FD, uint64_t Offset) { error_code ec = windows_error(GetLastError()); ::UnmapViewOfFile(Mapping); ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else ::CloseHandle(FileHandle); return ec; } Size = mbi.RegionSize; } + + // Close all the handles except for the view. It will keep the other handles + // alive. + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); // Also closes FileHandle. + } else + ::CloseHandle(FileHandle); return error_code::success(); } @@ -815,7 +834,7 @@ mapped_file_region::mapped_file_region(const Twine &path, } FileDescriptor = 0; - ec = init(FileDescriptor, offset); + ec = init(FileDescriptor, true, offset); if (ec) { Mapping = FileMappingHandle = 0; FileHandle = INVALID_HANDLE_VALUE; @@ -824,6 +843,7 @@ mapped_file_region::mapped_file_region(const Twine &path, } mapped_file_region::mapped_file_region(int fd, + bool closefd, mapmode mode, uint64_t length, uint64_t offset, @@ -836,13 +856,14 @@ mapped_file_region::mapped_file_region(int fd, , FileMappingHandle() { FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(fd)); if (FileHandle == INVALID_HANDLE_VALUE) { - _close(FileDescriptor); + if (closefd) + _close(FileDescriptor); FileDescriptor = 0; ec = make_error_code(errc::bad_file_descriptor); return; } - ec = init(FileDescriptor, offset); + ec = init(FileDescriptor, closefd, offset); if (ec) { Mapping = FileMappingHandle = 0; FileHandle = INVALID_HANDLE_VALUE; @@ -853,12 +874,6 @@ mapped_file_region::mapped_file_region(int fd, mapped_file_region::~mapped_file_region() { if (Mapping) ::UnmapViewOfFile(Mapping); - if (FileMappingHandle) - ::CloseHandle(FileMappingHandle); - if (FileDescriptor) - _close(FileDescriptor); - else if (FileHandle != INVALID_HANDLE_VALUE) - ::CloseHandle(FileHandle); } #if LLVM_HAS_RVALUE_REFERENCES diff --git a/lib/Support/Windows/Watchdog.inc b/lib/Support/Windows/Watchdog.inc new file mode 100644 index 0000000000..fab2bdf2a9 --- /dev/null +++ b/lib/Support/Windows/Watchdog.inc @@ -0,0 +1,24 @@ +//===--- Windows/Watchdog.inc - Windows Watchdog Implementation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the generic Windows implementation of the Watchdog class. +// +//===----------------------------------------------------------------------===// + +// TODO: implement. +// Currently this is only used by PrettyStackTrace which is also unimplemented +// on Windows. Roughly, a Windows implementation would use CreateWaitableTimer +// and a second thread to run the TimerAPCProc. + +namespace llvm { + namespace sys { + Watchdog::Watchdog(unsigned int seconds) {} + Watchdog::~Watchdog() {} + } +} diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index f71abd3b24..a433088b19 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -306,7 +306,12 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { if (LLVM_UNLIKELY(OutBufCur == OutBufStart)) { size_t BytesToWrite = Size - (Size % NumBytes); write_impl(Ptr, BytesToWrite); - copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite); + size_t BytesRemaining = Size - BytesToWrite; + if (BytesRemaining > size_t(OutBufEnd - OutBufCur)) { + // Too much left over to copy into our buffer. + return write(Ptr + BytesToWrite, BytesRemaining); + } + copy_to_buffer(Ptr + BytesToWrite, BytesRemaining); return *this; } @@ -512,7 +517,7 @@ raw_fd_ostream::~raw_fd_ostream() { // has_error() and clear the error flag with clear_error() before // destructing raw_ostream objects which may have errors. if (has_error()) - report_fatal_error("IO failure on output stream."); + report_fatal_error("IO failure on output stream.", /*GenCrashDiag=*/false); } diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp index ec84a72454..928b1203cd 100644 --- a/lib/TableGen/Error.cpp +++ b/lib/TableGen/Error.cpp @@ -20,9 +20,15 @@ namespace llvm { SourceMgr SrcMgr; +unsigned ErrorsPrinted = 0; static void PrintMessage(ArrayRef<SMLoc> Loc, SourceMgr::DiagKind Kind, const Twine &Msg) { + // Count the total number of errors printed. + // This is used to exit with an error code if there were any errors. + if (Kind == SourceMgr::DK_Error) + ++ErrorsPrinted; + SMLoc NullLoc; if (Loc.empty()) Loc = NullLoc; diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index e1cd623783..dc4167b305 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -117,11 +117,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) { if (MainFn(Out.os(), Records)) return 1; + if (ErrorsPrinted > 0) { + errs() << argv0 << ": " << ErrorsPrinted << " errors.\n"; + return 1; + } + // Declare success. Out.keep(); return 0; - - return 1; } } diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index c4b48fe5e8..86ad2a6e3c 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -1547,29 +1547,39 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { /// ParseDagArgList - Parse the argument list for a dag literal expression. /// -/// ParseDagArgList ::= Value (':' VARNAME)? -/// ParseDagArgList ::= ParseDagArgList ',' Value (':' VARNAME)? +/// DagArg ::= Value (':' VARNAME)? +/// DagArg ::= VARNAME +/// DagArgList ::= DagArg +/// DagArgList ::= DagArgList ',' DagArg std::vector<std::pair<llvm::Init*, std::string> > TGParser::ParseDagArgList(Record *CurRec) { std::vector<std::pair<llvm::Init*, std::string> > Result; while (1) { - Init *Val = ParseValue(CurRec); - if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >(); - - // If the variable name is present, add it. - std::string VarName; - if (Lex.getCode() == tgtok::colon) { - if (Lex.Lex() != tgtok::VarName) { // eat the ':' - TokError("expected variable name in dag literal"); + // DagArg ::= VARNAME + if (Lex.getCode() == tgtok::VarName) { + // A missing value is treated like '?'. + Result.push_back(std::make_pair(UnsetInit::get(), Lex.getCurStrVal())); + Lex.Lex(); + } else { + // DagArg ::= Value (':' VARNAME)? + Init *Val = ParseValue(CurRec); + if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >(); - } - VarName = Lex.getCurStrVal(); - Lex.Lex(); // eat the VarName. - } - Result.push_back(std::make_pair(Val, VarName)); + // If the variable name is present, add it. + std::string VarName; + if (Lex.getCode() == tgtok::colon) { + if (Lex.Lex() != tgtok::VarName) { // eat the ':' + TokError("expected variable name in dag literal"); + return std::vector<std::pair<llvm::Init*, std::string> >(); + } + VarName = Lex.getCurStrVal(); + Lex.Lex(); // eat the VarName. + } + Result.push_back(std::make_pair(Val, VarName)); + } if (Lex.getCode() != tgtok::comma) break; Lex.Lex(); // eat the ',' } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index cca6d12e16..dc41f2f605 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -349,59 +349,6 @@ AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, return TopOfFrameOffset - FrameRegPos; } -/// Estimate and return the size of the frame. -static unsigned estimateStackSize(MachineFunction &MF) { - // FIXME: Make generic? Really consider after upstreaming. This code is now - // shared between PEI, ARM *and* here. - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const { @@ -422,7 +369,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // callee-save register for this purpose or allocate an extra spill slot. bool BigStack = - (RS && estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) + (RS && MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)) || MFI->hasVarSizedObjects() // Access will be from X29: messes things up || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); @@ -449,7 +396,7 @@ AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // helpfully places it near either SP or FP for us to avoid // infinitely-regression during scavenging. const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 7b93463244..cf3a2c3707 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -618,11 +618,11 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB, int64_t NumBytes, MachineInstr::MIFlag MIFlags) { if (NumBytes == 0 && DstReg == SrcReg) return; - else if (abs(NumBytes) & ~0xffffff) { + else if (abs64(NumBytes) & ~0xffffff) { // Generically, we have to materialize the offset into a temporary register // and subtract it. There are a couple of ways this could be done, for now // we'll use a movz/movk or movn/movk sequence. - uint64_t Bits = static_cast<uint64_t>(abs(NumBytes)); + uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes)); BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) .addImm(0xffff & Bits).addImm(0) .setMIFlags(MIFlags); @@ -673,7 +673,7 @@ void llvm::emitRegUpdate(MachineBasicBlock &MBB, } else { LowOp = AArch64::SUBxxi_lsl0_s; HighOp = AArch64::SUBxxi_lsl12_s; - NumBytes = abs(NumBytes); + NumBytes = abs64(NumBytes); } // If we're here, at the very least a move needs to be produced, which just diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index c1695dacb4..69bb80a485 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -160,44 +160,53 @@ private: SMLoc StartLoc, EndLoc; + struct ImmWithLSLOp { + const MCExpr *Val; + unsigned ShiftAmount; + bool ImplicitAmount; + }; + + struct CondCodeOp { + A64CC::CondCodes Code; + }; + + struct FPImmOp { + double Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ShiftExtendOp { + A64SE::ShiftExtSpecifiers ShiftType; + unsigned Amount; + bool ImplicitAmount; + }; + + struct SysRegOp { + const char *Data; + unsigned Length; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + union { - struct { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; - } ImmWithLSL; - - struct { - A64CC::CondCodes Code; - } CondCode; - - struct { - double Val; - } FPImm; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned RegNum; - } Reg; - - struct { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; - } ShiftExtend; - - struct { - const char *Data; - unsigned Length; - } SysReg; - - struct { - const char *Data; - unsigned Length; - } Tok; + struct ImmWithLSLOp ImmWithLSL; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct ImmOp Imm; + struct RegOp Reg; + struct ShiftExtendOp ShiftExtend; + struct SysRegOp SysReg; + struct TokOp Tok; }; AArch64Operand(KindTy K, SMLoc S, SMLoc E) diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index ab9bba1836..c6690a96c7 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -194,7 +194,17 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { {"rvbar_el3", RVBAR_EL3}, {"isr_el1", ISR_EL1}, {"cntpct_el0", CNTPCT_EL0}, - {"cntvct_el0", CNTVCT_EL0} + {"cntvct_el0", CNTVCT_EL0}, + + // GICv3 registers + {"icc_iar1_el1", ICC_IAR1_EL1}, + {"icc_iar0_el1", ICC_IAR0_EL1}, + {"icc_hppir1_el1", ICC_HPPIR1_EL1}, + {"icc_hppir0_el1", ICC_HPPIR0_EL1}, + {"icc_rpr_el1", ICC_RPR_EL1}, + {"ich_vtr_el2", ICH_VTR_EL2}, + {"ich_eisr_el2", ICH_EISR_EL2}, + {"ich_elsr_el2", ICH_ELSR_EL2} }; A64SysReg::MRSMapper::MRSMapper() { @@ -205,7 +215,15 @@ A64SysReg::MRSMapper::MRSMapper() { const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { {"dbgdtrtx_el0", DBGDTRTX_EL0}, {"oslar_el1", OSLAR_EL1}, - {"pmswinc_el0", PMSWINC_EL0} + {"pmswinc_el0", PMSWINC_EL0}, + + // GICv3 registers + {"icc_eoir1_el1", ICC_EOIR1_EL1}, + {"icc_eoir0_el1", ICC_EOIR0_EL1}, + {"icc_dir_el1", ICC_DIR_EL1}, + {"icc_sgi1r_el1", ICC_SGI1R_EL1}, + {"icc_asgi1r_el1", ICC_ASGI1R_EL1}, + {"icc_sgi0r_el1", ICC_SGI0R_EL1} }; A64SysReg::MSRMapper::MSRMapper() { @@ -467,6 +485,56 @@ const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { {"pmevtyper28_el0", PMEVTYPER28_EL0}, {"pmevtyper29_el0", PMEVTYPER29_EL0}, {"pmevtyper30_el0", PMEVTYPER30_EL0}, + + // GICv3 registers + {"icc_bpr1_el1", ICC_BPR1_EL1}, + {"icc_bpr0_el1", ICC_BPR0_EL1}, + {"icc_pmr_el1", ICC_PMR_EL1}, + {"icc_ctlr_el1", ICC_CTLR_EL1}, + {"icc_ctlr_el3", ICC_CTLR_EL3}, + {"icc_sre_el1", ICC_SRE_EL1}, + {"icc_sre_el2", ICC_SRE_EL2}, + {"icc_sre_el3", ICC_SRE_EL3}, + {"icc_igrpen0_el1", ICC_IGRPEN0_EL1}, + {"icc_igrpen1_el1", ICC_IGRPEN1_EL1}, + {"icc_igrpen1_el3", ICC_IGRPEN1_EL3}, + {"icc_seien_el1", ICC_SEIEN_EL1}, + {"icc_ap0r0_el1", ICC_AP0R0_EL1}, + {"icc_ap0r1_el1", ICC_AP0R1_EL1}, + {"icc_ap0r2_el1", ICC_AP0R2_EL1}, + {"icc_ap0r3_el1", ICC_AP0R3_EL1}, + {"icc_ap1r0_el1", ICC_AP1R0_EL1}, + {"icc_ap1r1_el1", ICC_AP1R1_EL1}, + {"icc_ap1r2_el1", ICC_AP1R2_EL1}, + {"icc_ap1r3_el1", ICC_AP1R3_EL1}, + {"ich_ap0r0_el2", ICH_AP0R0_EL2}, + {"ich_ap0r1_el2", ICH_AP0R1_EL2}, + {"ich_ap0r2_el2", ICH_AP0R2_EL2}, + {"ich_ap0r3_el2", ICH_AP0R3_EL2}, + {"ich_ap1r0_el2", ICH_AP1R0_EL2}, + {"ich_ap1r1_el2", ICH_AP1R1_EL2}, + {"ich_ap1r2_el2", ICH_AP1R2_EL2}, + {"ich_ap1r3_el2", ICH_AP1R3_EL2}, + {"ich_hcr_el2", ICH_HCR_EL2}, + {"ich_misr_el2", ICH_MISR_EL2}, + {"ich_vmcr_el2", ICH_VMCR_EL2}, + {"ich_vseir_el2", ICH_VSEIR_EL2}, + {"ich_lr0_el2", ICH_LR0_EL2}, + {"ich_lr1_el2", ICH_LR1_EL2}, + {"ich_lr2_el2", ICH_LR2_EL2}, + {"ich_lr3_el2", ICH_LR3_EL2}, + {"ich_lr4_el2", ICH_LR4_EL2}, + {"ich_lr5_el2", ICH_LR5_EL2}, + {"ich_lr6_el2", ICH_LR6_EL2}, + {"ich_lr7_el2", ICH_LR7_EL2}, + {"ich_lr8_el2", ICH_LR8_EL2}, + {"ich_lr9_el2", ICH_LR9_EL2}, + {"ich_lr10_el2", ICH_LR10_EL2}, + {"ich_lr11_el2", ICH_LR11_EL2}, + {"ich_lr12_el2", ICH_LR12_EL2}, + {"ich_lr13_el2", ICH_LR13_EL2}, + {"ich_lr14_el2", ICH_LR14_EL2}, + {"ich_lr15_el2", ICH_LR15_EL2} }; uint32_t diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 5eebf44431..c9b6e23de3 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -354,13 +354,31 @@ namespace A64SysReg { RVBAR_EL3 = 0xf601, // 11 110 1100 0000 001 ISR_EL1 = 0xc608, // 11 000 1100 0001 000 CNTPCT_EL0 = 0xdf01, // 11 011 1110 0000 001 - CNTVCT_EL0 = 0xdf02 // 11 011 1110 0000 010 + CNTVCT_EL0 = 0xdf02, // 11 011 1110 0000 010 + + // GICv3 registers + ICC_IAR1_EL1 = 0xc660, // 11 000 1100 1100 000 + ICC_IAR0_EL1 = 0xc640, // 11 000 1100 1000 000 + ICC_HPPIR1_EL1 = 0xc662, // 11 000 1100 1100 010 + ICC_HPPIR0_EL1 = 0xc642, // 11 000 1100 1000 010 + ICC_RPR_EL1 = 0xc65b, // 11 000 1100 1011 011 + ICH_VTR_EL2 = 0xe659, // 11 100 1100 1011 001 + ICH_EISR_EL2 = 0xe65b, // 11 100 1100 1011 011 + ICH_ELSR_EL2 = 0xe65d // 11 100 1100 1011 101 }; enum SysRegWOValues { DBGDTRTX_EL0 = 0x9828, // 10 011 0000 0101 000 OSLAR_EL1 = 0x8084, // 10 000 0001 0000 100 - PMSWINC_EL0 = 0xdce4 // 11 011 1001 1100 100 + PMSWINC_EL0 = 0xdce4, // 11 011 1001 1100 100 + + // GICv3 registers + ICC_EOIR1_EL1 = 0xc661, // 11 000 1100 1100 001 + ICC_EOIR0_EL1 = 0xc641, // 11 000 1100 1000 001 + ICC_DIR_EL1 = 0xc659, // 11 000 1100 1011 001 + ICC_SGI1R_EL1 = 0xc65d, // 11 000 1100 1011 101 + ICC_ASGI1R_EL1 = 0xc65e, // 11 000 1100 1011 110 + ICC_SGI0R_EL1 = 0xc65f // 11 000 1100 1011 111 }; enum SysRegValues { @@ -616,7 +634,57 @@ namespace A64SysReg { PMEVTYPER27_EL0 = 0xdf7b, // 11 011 1110 1111 011 PMEVTYPER28_EL0 = 0xdf7c, // 11 011 1110 1111 100 PMEVTYPER29_EL0 = 0xdf7d, // 11 011 1110 1111 101 - PMEVTYPER30_EL0 = 0xdf7e // 11 011 1110 1111 110 + PMEVTYPER30_EL0 = 0xdf7e, // 11 011 1110 1111 110 + + // GICv3 registers + ICC_BPR1_EL1 = 0xc663, // 11 000 1100 1100 011 + ICC_BPR0_EL1 = 0xc643, // 11 000 1100 1000 011 + ICC_PMR_EL1 = 0xc230, // 11 000 0100 0110 000 + ICC_CTLR_EL1 = 0xc664, // 11 000 1100 1100 100 + ICC_CTLR_EL3 = 0xf664, // 11 110 1100 1100 100 + ICC_SRE_EL1 = 0xc665, // 11 000 1100 1100 101 + ICC_SRE_EL2 = 0xe64d, // 11 100 1100 1001 101 + ICC_SRE_EL3 = 0xf665, // 11 110 1100 1100 101 + ICC_IGRPEN0_EL1 = 0xc666, // 11 000 1100 1100 110 + ICC_IGRPEN1_EL1 = 0xc667, // 11 000 1100 1100 111 + ICC_IGRPEN1_EL3 = 0xf667, // 11 110 1100 1100 111 + ICC_SEIEN_EL1 = 0xc668, // 11 000 1100 1101 000 + ICC_AP0R0_EL1 = 0xc644, // 11 000 1100 1000 100 + ICC_AP0R1_EL1 = 0xc645, // 11 000 1100 1000 101 + ICC_AP0R2_EL1 = 0xc646, // 11 000 1100 1000 110 + ICC_AP0R3_EL1 = 0xc647, // 11 000 1100 1000 111 + ICC_AP1R0_EL1 = 0xc648, // 11 000 1100 1001 000 + ICC_AP1R1_EL1 = 0xc649, // 11 000 1100 1001 001 + ICC_AP1R2_EL1 = 0xc64a, // 11 000 1100 1001 010 + ICC_AP1R3_EL1 = 0xc64b, // 11 000 1100 1001 011 + ICH_AP0R0_EL2 = 0xe640, // 11 100 1100 1000 000 + ICH_AP0R1_EL2 = 0xe641, // 11 100 1100 1000 001 + ICH_AP0R2_EL2 = 0xe642, // 11 100 1100 1000 010 + ICH_AP0R3_EL2 = 0xe643, // 11 100 1100 1000 011 + ICH_AP1R0_EL2 = 0xe648, // 11 100 1100 1001 000 + ICH_AP1R1_EL2 = 0xe649, // 11 100 1100 1001 001 + ICH_AP1R2_EL2 = 0xe64a, // 11 100 1100 1001 010 + ICH_AP1R3_EL2 = 0xe64b, // 11 100 1100 1001 011 + ICH_HCR_EL2 = 0xe658, // 11 100 1100 1011 000 + ICH_MISR_EL2 = 0xe65a, // 11 100 1100 1011 010 + ICH_VMCR_EL2 = 0xe65f, // 11 100 1100 1011 111 + ICH_VSEIR_EL2 = 0xe64c, // 11 100 1100 1001 100 + ICH_LR0_EL2 = 0xe660, // 11 100 1100 1100 000 + ICH_LR1_EL2 = 0xe661, // 11 100 1100 1100 001 + ICH_LR2_EL2 = 0xe662, // 11 100 1100 1100 010 + ICH_LR3_EL2 = 0xe663, // 11 100 1100 1100 011 + ICH_LR4_EL2 = 0xe664, // 11 100 1100 1100 100 + ICH_LR5_EL2 = 0xe665, // 11 100 1100 1100 101 + ICH_LR6_EL2 = 0xe666, // 11 100 1100 1100 110 + ICH_LR7_EL2 = 0xe667, // 11 100 1100 1100 111 + ICH_LR8_EL2 = 0xe668, // 11 100 1100 1101 000 + ICH_LR9_EL2 = 0xe669, // 11 100 1100 1101 001 + ICH_LR10_EL2 = 0xe66a, // 11 100 1100 1101 010 + ICH_LR11_EL2 = 0xe66b, // 11 100 1100 1101 011 + ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100 + ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 + ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 + ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111 }; // Note that these do not inherit from NamedImmMapper. This class is diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp new file mode 100644 index 0000000000..f0d4dbe2bf --- /dev/null +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -0,0 +1,704 @@ +//=== A15SDOptimizerPass.cpp - Optimize DPR and SPR register accesses on A15==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The Cortex-A15 processor employs a tracking scheme in its register renaming +// in order to process each instruction's micro-ops speculatively and +// out-of-order with appropriate forwarding. The ARM architecture allows VFP +// instructions to read and write 32-bit S-registers. Each S-register +// corresponds to one half (upper or lower) of an overlaid 64-bit D-register. +// +// There are several instruction patterns which can be used to provide this +// capability which can provide higher performance than other, potentially more +// direct patterns, specifically around when one micro-op reads a D-register +// operand that has recently been written as one or more S-register results. +// +// This file defines a pre-regalloc pass which looks for SPR producers which +// are going to be used by a DPR (or QPR) consumers and creates the more +// optimized access pattern. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "a15-sd-optimizer" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" +#include "ARMISelLowering.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#include <set> + +using namespace llvm; + +namespace { + struct A15SDOptimizer : public MachineFunctionPass { + static char ID; + A15SDOptimizer() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM A15 S->D optimizer"; + } + + private: + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + bool runOnInstruction(MachineInstr *MI); + + // + // Instruction builder helpers + // + unsigned createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, + bool QPR=false); + + unsigned createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC); + + unsigned createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1); + + unsigned createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2); + + unsigned createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert); + + unsigned createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL); + + // + // Various property checkers + // + bool usesRegClass(MachineOperand &MO, const TargetRegisterClass *TRC); + bool hasPartialWrite(MachineInstr *MI); + SmallVector<unsigned, 8> getReadDPRs(MachineInstr *MI); + unsigned getDPRLaneFromSPR(unsigned SReg); + + // + // Methods used for getting the definitions of partial registers + // + + MachineInstr *elideCopies(MachineInstr *MI); + void elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs); + + // + // Pattern optimization methods + // + unsigned optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg); + unsigned optimizeSDPattern(MachineInstr *MI); + unsigned getPrefSPRLane(unsigned SReg); + + // + // Sanitizing method - used to make sure if don't leave dead code around. + // + void eraseInstrWithNoUses(MachineInstr *MI); + + // + // A map used to track the changes done by this pass. + // + std::map<MachineInstr*, unsigned> Replacements; + std::set<MachineInstr *> DeadInstr; + }; + char A15SDOptimizer::ID = 0; +} // end anonymous namespace + +// Returns true if this is a use of a SPR register. +bool A15SDOptimizer::usesRegClass(MachineOperand &MO, + const TargetRegisterClass *TRC) { + if (!MO.isReg()) + return false; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI->getRegClass(Reg)->hasSuperClassEq(TRC); + else + return TRC->contains(Reg); +} + +unsigned A15SDOptimizer::getDPRLaneFromSPR(unsigned SReg) { + unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, + &ARM::DPRRegClass); + if (DReg != ARM::NoRegister) return ARM::ssub_1; + return ARM::ssub_0; +} + +// Get the subreg type that is most likely to be coalesced +// for an SPR register that will be used in VDUP32d pseudo. +unsigned A15SDOptimizer::getPrefSPRLane(unsigned SReg) { + if (!TRI->isVirtualRegister(SReg)) + return getDPRLaneFromSPR(SReg); + + MachineInstr *MI = MRI->getVRegDef(SReg); + if (!MI) return ARM::ssub_0; + MachineOperand *MO = MI->findRegisterDefOperand(SReg); + + assert(MO->isReg() && "Non register operand found!"); + if (!MO) return ARM::ssub_0; + + if (MI->isCopy() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + SReg = MI->getOperand(1).getReg(); + } + + if (TargetRegisterInfo::isVirtualRegister(SReg)) { + if (MO->getSubReg() == ARM::ssub_1) return ARM::ssub_1; + return ARM::ssub_0; + } + return getDPRLaneFromSPR(SReg); +} + +// MI is known to be dead. Figure out what instructions +// are also made dead by this and mark them for removal. +void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { + SmallVector<MachineInstr *, 8> Front; + DeadInstr.insert(MI); + + DEBUG(dbgs() << "Deleting base instruction " << *MI << "\n"); + Front.push_back(MI); + + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // MI is already known to be dead. We need to see + // if other instructions can also be removed. + for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if ((!MO.isReg()) || (!MO.isUse())) + continue; + unsigned Reg = MO.getReg(); + if (!TRI->isVirtualRegister(Reg)) + continue; + MachineOperand *Op = MI->findRegisterDefOperand(Reg); + + if (!Op) + continue; + + MachineInstr *Def = Op->getParent(); + + // We don't need to do anything if we have already marked + // this instruction as being dead. + if (DeadInstr.find(Def) != DeadInstr.end()) + continue; + + // Check if all the uses of this instruction are marked as + // dead. If so, we can also mark this instruction as being + // dead. + bool IsDead = true; + for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { + MachineOperand &MODef = Def->getOperand(j); + if ((!MODef.isReg()) || (!MODef.isDef())) + continue; + unsigned DefReg = MODef.getReg(); + if (!TRI->isVirtualRegister(DefReg)) { + IsDead = false; + break; + } + for (MachineRegisterInfo::use_iterator II = MRI->use_begin(Reg), + EE = MRI->use_end(); + II != EE; ++II) { + // We don't care about self references. + if (&*II == Def) + continue; + if (DeadInstr.find(&*II) == DeadInstr.end()) { + IsDead = false; + break; + } + } + } + + if (!IsDead) continue; + + DEBUG(dbgs() << "Deleting instruction " << *Def << "\n"); + DeadInstr.insert(Def); + } + } +} + +// Creates the more optimized patterns and generally does all the code +// transformations in this pass. +unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { + if (MI->isCopy()) { + return optimizeAllLanesPattern(MI, MI->getOperand(1).getReg()); + } + + if (MI->isInsertSubreg()) { + unsigned DPRReg = MI->getOperand(1).getReg(); + unsigned SPRReg = MI->getOperand(2).getReg(); + + if (TRI->isVirtualRegister(DPRReg) && TRI->isVirtualRegister(SPRReg)) { + MachineInstr *DPRMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + MachineInstr *SPRMI = MRI->getVRegDef(MI->getOperand(2).getReg()); + + if (DPRMI && SPRMI) { + // See if the first operand of this insert_subreg is IMPLICIT_DEF + MachineInstr *ECDef = elideCopies(DPRMI); + if (ECDef != 0 && ECDef->isImplicitDef()) { + // Another corner case - if we're inserting something that is purely + // a subreg copy of a DPR, just use that DPR. + + MachineInstr *EC = elideCopies(SPRMI); + // Is it a subreg copy of ssub_0? + if (EC && EC->isCopy() && + EC->getOperand(1).getSubReg() == ARM::ssub_0) { + DEBUG(dbgs() << "Found a subreg copy: " << *SPRMI); + + // Find the thing we're subreg copying out of - is it of the same + // regclass as DPRMI? (i.e. a DPR or QPR). + unsigned FullReg = SPRMI->getOperand(1).getReg(); + const TargetRegisterClass *TRC = + MRI->getRegClass(MI->getOperand(1).getReg()); + if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { + DEBUG(dbgs() << "Subreg copy is compatible - returning "); + DEBUG(dbgs() << PrintReg(FullReg) << "\n"); + eraseInstrWithNoUses(MI); + return FullReg; + } + } + + return optimizeAllLanesPattern(MI, MI->getOperand(2).getReg()); + } + } + } + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), + &ARM::SPRRegClass)) { + // See if all bar one of the operands are IMPLICIT_DEF and insert the + // optimizer pattern accordingly. + unsigned NumImplicit = 0, NumTotal = 0; + unsigned NonImplicitReg = ~0U; + + for (unsigned I = 1; I < MI->getNumExplicitOperands(); ++I) { + if (!MI->getOperand(I).isReg()) + continue; + ++NumTotal; + unsigned OpReg = MI->getOperand(I).getReg(); + + if (!TRI->isVirtualRegister(OpReg)) + break; + + MachineInstr *Def = MRI->getVRegDef(OpReg); + if (!Def) + break; + if (Def->isImplicitDef()) + ++NumImplicit; + else + NonImplicitReg = MI->getOperand(I).getReg(); + } + + if (NumImplicit == NumTotal - 1) + return optimizeAllLanesPattern(MI, NonImplicitReg); + else + return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); + } + + assert(0 && "Unhandled update pattern!"); + return 0; +} + +// Return true if this MachineInstr inserts a scalar (SPR) value into +// a D or Q register. +bool A15SDOptimizer::hasPartialWrite(MachineInstr *MI) { + // The only way we can do a partial register update is through a COPY, + // INSERT_SUBREG or REG_SEQUENCE. + if (MI->isCopy() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + if (MI->isInsertSubreg() && usesRegClass(MI->getOperand(2), + &ARM::SPRRegClass)) + return true; + + if (MI->isRegSequence() && usesRegClass(MI->getOperand(1), &ARM::SPRRegClass)) + return true; + + return false; +} + +// Looks through full copies to get the instruction that defines the input +// operand for MI. +MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { + if (!MI->isFullCopy()) + return MI; + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + return NULL; + MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!Def) + return NULL; + return elideCopies(Def); +} + +// Look through full copies and PHIs to get the set of non-copy MachineInstrs +// that can produce MI. +void A15SDOptimizer::elideCopiesAndPHIs(MachineInstr *MI, + SmallVectorImpl<MachineInstr*> &Outs) { + // Looking through PHIs may create loops so we need to track what + // instructions we have visited before. + std::set<MachineInstr *> Reached; + SmallVector<MachineInstr *, 8> Front; + Front.push_back(MI); + while (Front.size() != 0) { + MI = Front.back(); + Front.pop_back(); + + // If we have already explored this MachineInstr, ignore it. + if (Reached.find(MI) != Reached.end()) + continue; + Reached.insert(MI); + if (MI->isPHI()) { + for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) { + unsigned Reg = MI->getOperand(I).getReg(); + if (!TRI->isVirtualRegister(Reg)) { + continue; + } + MachineInstr *NewMI = MRI->getVRegDef(Reg); + if (!NewMI) + continue; + Front.push_back(NewMI); + } + } else if (MI->isFullCopy()) { + if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) + continue; + MachineInstr *NewMI = MRI->getVRegDef(MI->getOperand(1).getReg()); + if (!NewMI) + continue; + Front.push_back(NewMI); + } else { + DEBUG(dbgs() << "Found partial copy" << *MI <<"\n"); + Outs.push_back(MI); + } + } +} + +// Return the DPR virtual registers that are read by this machine instruction +// (if any). +SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { + if (MI->isCopyLike() || MI->isInsertSubreg() || MI->isRegSequence() || + MI->isKill()) + return SmallVector<unsigned, 8>(); + + SmallVector<unsigned, 8> Defs; + for (unsigned i = 0; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (!MO.isReg() || !MO.isUse()) + continue; + if (!usesRegClass(MO, &ARM::DPRRegClass) && + !usesRegClass(MO, &ARM::QPRRegClass)) + continue; + + Defs.push_back(MO.getReg()); + } + return Defs; +} + +// Creates a DPR register from an SPR one by using a VDUP. +unsigned +A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg, unsigned Lane, bool QPR) { + unsigned Out = MRI->createVirtualRegister(QPR ? &ARM::QPRRegClass : + &ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(QPR ? ARM::VDUPLN32q : ARM::VDUPLN32d), + Out) + .addReg(Reg) + .addImm(Lane)); + + return Out; +} + +// Creates a SPR register from a DPR by copying the value in lane 0. +unsigned +A15SDOptimizer::createExtractSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned DReg, unsigned Lane, + const TargetRegisterClass *TRC) { + unsigned Out = MRI->createVirtualRegister(TRC); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::COPY), Out) + .addReg(DReg, 0, Lane); + + return Out; +} + +// Takes two SPR registers and creates a DPR by using a REG_SEQUENCE. +unsigned +A15SDOptimizer::createRegSequence(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Reg1, unsigned Reg2) { + unsigned Out = MRI->createVirtualRegister(&ARM::QPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::REG_SEQUENCE), Out) + .addReg(Reg1) + .addImm(ARM::dsub_0) + .addReg(Reg2) + .addImm(ARM::dsub_1); + return Out; +} + +// Takes two DPR registers that have previously been VDUPed (Ssub0 and Ssub1) +// and merges them into one DPR register. +unsigned +A15SDOptimizer::createVExt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, + unsigned Ssub0, unsigned Ssub1) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + AddDefaultPred(BuildMI(MBB, + InsertBefore, + DL, + TII->get(ARM::VEXTd32), Out) + .addReg(Ssub0) + .addReg(Ssub1) + .addImm(1)); + return Out; +} + +unsigned +A15SDOptimizer::createInsertSubreg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL, unsigned DReg, unsigned Lane, + unsigned ToInsert) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPR_VFP2RegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::INSERT_SUBREG), Out) + .addReg(DReg) + .addReg(ToInsert) + .addImm(Lane); + + return Out; +} + +unsigned +A15SDOptimizer::createImplicitDef(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + DebugLoc DL) { + unsigned Out = MRI->createVirtualRegister(&ARM::DPRRegClass); + BuildMI(MBB, + InsertBefore, + DL, + TII->get(TargetOpcode::IMPLICIT_DEF), Out); + return Out; +} + +// This function inserts instructions in order to optimize interactions between +// SPR registers and DPR/QPR registers. It does so by performing VDUPs on all +// lanes, and the using VEXT instructions to recompose the result. +unsigned +A15SDOptimizer::optimizeAllLanesPattern(MachineInstr *MI, unsigned Reg) { + MachineBasicBlock::iterator InsertPt(MI); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock &MBB = *MI->getParent(); + InsertPt++; + unsigned Out; + + if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::QPRRegClass)) { + unsigned DSub0 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_0, &ARM::DPRRegClass); + unsigned DSub1 = createExtractSubreg(MBB, InsertPt, DL, Reg, + ARM::dsub_1, &ARM::DPRRegClass); + + unsigned Out1 = createDupLane(MBB, InsertPt, DL, DSub0, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, DSub0, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + unsigned Out3 = createDupLane(MBB, InsertPt, DL, DSub1, 0); + unsigned Out4 = createDupLane(MBB, InsertPt, DL, DSub1, 1); + Out2 = createVExt(MBB, InsertPt, DL, Out3, Out4); + + Out = createRegSequence(MBB, InsertPt, DL, Out, Out2); + + } else if (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::DPRRegClass)) { + unsigned Out1 = createDupLane(MBB, InsertPt, DL, Reg, 0); + unsigned Out2 = createDupLane(MBB, InsertPt, DL, Reg, 1); + Out = createVExt(MBB, InsertPt, DL, Out1, Out2); + + } else { + assert(MRI->getRegClass(Reg)->hasSuperClassEq(&ARM::SPRRegClass) && + "Found unexpected regclass!"); + + unsigned PrefLane = getPrefSPRLane(Reg); + unsigned Lane; + switch (PrefLane) { + case ARM::ssub_0: Lane = 0; break; + case ARM::ssub_1: Lane = 1; break; + default: llvm_unreachable("Unknown preferred lane!"); + } + + bool UsesQPR = usesRegClass(MI->getOperand(0), &ARM::QPRRegClass); + + Out = createImplicitDef(MBB, InsertPt, DL); + Out = createInsertSubreg(MBB, InsertPt, DL, Out, PrefLane, Reg); + Out = createDupLane(MBB, InsertPt, DL, Out, Lane, UsesQPR); + eraseInstrWithNoUses(MI); + } + return Out; +} + +bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { + // We look for instructions that write S registers that are then read as + // D/Q registers. These can only be caused by COPY, INSERT_SUBREG and + // REG_SEQUENCE pseudos that insert an SPR value into a DPR register or + // merge two SPR values to form a DPR register. In order avoid false + // positives we make sure that there is an SPR producer so we look past + // COPY and PHI nodes to find it. + // + // The best code pattern for when an SPR producer is going to be used by a + // DPR or QPR consumer depends on whether the other lanes of the + // corresponding DPR/QPR are currently defined. + // + // We can handle these efficiently, depending on the type of + // pseudo-instruction that is producing the pattern + // + // * COPY: * VDUP all lanes and merge the results together + // using VEXTs. + // + // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR + // lane, and the other lane(s) of the DPR/QPR register + // that we are inserting in are undefined, use the + // original DPR/QPR value. + // * Otherwise, fall back on the same stategy as COPY. + // + // * REG_SEQUENCE: * If all except one of the input operands are + // IMPLICIT_DEFs, insert the VDUP pattern for just the + // defined input operand + // * Otherwise, fall back on the same stategy as COPY. + // + + // First, get all the reads of D-registers done by this instruction. + SmallVector<unsigned, 8> Defs = getReadDPRs(MI); + bool Modified = false; + + for (SmallVector<unsigned, 8>::iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + // Follow the def-use chain for this DPR through COPYs, and also through + // PHIs (which are essentially multi-way COPYs). It is because of PHIs that + // we can end up with multiple defs of this DPR. + + SmallVector<MachineInstr *, 8> DefSrcs; + if (!TRI->isVirtualRegister(*I)) + continue; + MachineInstr *Def = MRI->getVRegDef(*I); + if (!Def) + continue; + + elideCopiesAndPHIs(Def, DefSrcs); + + for (SmallVector<MachineInstr*, 8>::iterator II = DefSrcs.begin(), + EE = DefSrcs.end(); II != EE; ++II) { + MachineInstr *MI = *II; + + // If we've already analyzed and replaced this operand, don't do + // anything. + if (Replacements.find(MI) != Replacements.end()) + continue; + + // Now, work out if the instruction causes a SPR->DPR dependency. + if (!hasPartialWrite(MI)) + continue; + + // Collect all the uses of this MI's DPR def for updating later. + SmallVector<MachineOperand*, 8> Uses; + unsigned DPRDefReg = MI->getOperand(0).getReg(); + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg), + E = MRI->use_end(); I != E; ++I) + Uses.push_back(&I.getOperand()); + + // We can optimize this. + unsigned NewReg = optimizeSDPattern(MI); + + if (NewReg != 0) { + Modified = true; + for (SmallVector<MachineOperand*, 8>::const_iterator I = Uses.begin(), + E = Uses.end(); I != E; ++I) { + DEBUG(dbgs() << "Replacing operand " + << **I << " with " + << PrintReg(NewReg) << "\n"); + (*I)->substVirtReg(NewReg, 0, *TRI); + } + } + Replacements[MI] = NewReg; + } + } + return Modified; +} + +bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { + TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + bool Modified = false; + + DEBUG(dbgs() << "Running on function " << Fn.getName()<< "\n"); + + DeadInstr.clear(); + Replacements.clear(); + + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + + for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); + MI != ME;) { + Modified |= runOnInstruction(MI++); + } + + } + + for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), + E = DeadInstr.end(); + I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Modified; +} + +FunctionPass *llvm::createA15SDOptimizerPass() { + return new A15SDOptimizer(); +} diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 5faf8c320c..80e5f37eb0 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -35,6 +35,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE); +FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMGlobalBaseRegPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 46915eecf6..68380847a0 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -143,14 +143,12 @@ include "ARMSchedule.td" // ARM processor families. def ProcA5 : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5", "Cortex-A5 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", - [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureVMLxForwarding, - FeatureT2XtPk]>; + [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx, + FeatureVMLxForwarding, FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", [FeatureVMLxForwarding, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 58c779830e..13ec208793 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1357,7 +1357,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(MCInstBuilder(ARM::MOVr) .addReg(ARM::PC) - .addImm(MI->getOperand(0).getReg()) + .addReg(MI->getOperand(0).getReg()) // Add predicate operands. .addImm(ARMCC::AL) .addReg(0) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ed001ea24a..126f160f6d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1125,7 +1125,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -3734,9 +3734,9 @@ ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const { if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI)) return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON)); - // A9-like cores are particularly picky about mixing the two and want these + // CortexA9 is particularly picky about mixing the two and wants these // converted. - if (Subtarget.isLikeA9() && !isPredicated(MI) && + if (Subtarget.isCortexA9() && !isPredicated(MI) && (MI->getOpcode() == ARM::VMOVRS || MI->getOpcode() == ARM::VMOVSR || MI->getOpcode() == ARM::VMOVS)) @@ -4023,14 +4023,12 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops. // // FCONSTD can be used as a dependency-breaking instruction. - - unsigned ARMBaseInstrInfo:: getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, const TargetRegisterInfo *TRI) const { - // Only Swift has partial register update problems. - if (!SwiftPartialUpdateClearance || !Subtarget.isSwift()) + if (!SwiftPartialUpdateClearance || + !(Subtarget.isSwift() || Subtarget.isCortexA15())) return 0; assert(TRI && "Need TRI instance"); @@ -4056,7 +4054,7 @@ getPartialRegUpdateClearance(const MachineInstr *MI, // Explicitly reads the dependency. case ARM::VLD1LNd32: - UseOp = 1; + UseOp = 3; break; default: return 0; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index abdd251743..b6b27f849a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -680,7 +680,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // means the stack pointer cannot be used to access the emergency spill slot // when !hasReservedCallFrame(). #ifndef NDEBUG - if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ assert(TFI->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0ca6450e2b..7a02adf246 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1038,58 +1038,6 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, return FnSize; } -/// estimateStackSize - Estimate and return the size of the frame. -/// FIXME: Make generic? -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - unsigned MaxAlign = MFI->getMaxAlignment(); - int Offset = 0; - - // This code is very, very similar to PEI::calculateFrameObjectOffsets(). - // It really should be refactored to share code. Until then, changes - // should keep in mind that there's tight coupling between the two. - - for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -MFI->getObjectOffset(i); - if (FixedOff > Offset) Offset = FixedOff; - } - for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { - if (MFI->isDeadObjectIndex(i)) - continue; - Offset += MFI->getObjectSize(i); - unsigned Align = MFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset+Align-1)/Align*Align; - - MaxAlign = std::max(Align, MaxAlign); - } - - if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF)) - Offset += MFI->getMaxCallFrameSize(); - - // Round up the size to a multiple of the alignment. If the function has - // any calls or alloca's, align to the target's StackAlignment value to - // ensure that the callee's frame or the alloca data is suitably aligned; - // otherwise, for leaf functions, align to the TransientStackAlignment - // value. - unsigned StackAlign; - if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || - (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0)) - StackAlign = TFI->getStackAlignment(); - else - StackAlign = TFI->getTransientStackAlignment(); - - // If the frame pointer is eliminated, all frame offsets will be relative to - // SP not FP. Align to MaxAlign so this works. - StackAlign = std::max(StackAlign, MaxAlign); - unsigned AlignMask = StackAlign - 1; - Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); - - return (unsigned)Offset; -} - /// estimateRSStackSizeLimit - Look at each instruction that references stack /// frames and return the stack size limit beyond which some of these /// instructions will require a scratch register during their expansion later. @@ -1235,7 +1183,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // we've used all the registers and so R4 is already used, so not marking // it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = estimateStackSize(MF); + unsigned StackSize = MFI->estimateStackSize(MF); if (MFI->hasVarSizedObjects() || StackSize > 508) MRI.setPhysRegUsed(ARM::R4); } @@ -1330,7 +1278,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // worth the effort and added fragility? bool BigStack = (RS && - (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= + (MFI->estimateStackSize(MF) + + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= estimateRSStackSizeLimit(MF, this))) || MFI->hasVarSizedObjects() || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); @@ -1419,7 +1368,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. const TargetRegisterClass *RC = &ARM::GPRRegClass; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 514971f01e..bb26090d2d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -564,6 +564,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + // Custom expand long extensions to vectors. + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); + setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); + setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); + // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. @@ -870,8 +880,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // are at least 4 bytes aligned. setMinStackArgumentAlignment(4); - BenefitFromCodePlacementOpt = true; - // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->isLikeA9(); @@ -3433,6 +3441,47 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), +/// and size(DestVec) > 128-bits. +/// This is achieved by doing the one extension from the SrcVec, splitting the +/// result, extending these parts, and then concatenating these into the +/// destination. +static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DestVT = N->getValueType(0); + + assert(DestVT.getSizeInBits() > 128 && + "Custom sext/zext expansion needs >128-bit vector."); + // If this is a normal length extension, use the default expansion. + if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && + SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) + return SDValue(); + + DebugLoc dl = N->getDebugLoc(); + unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); + unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); + unsigned NumElts = SrcVT.getVectorNumElements(); + LLVMContext &Ctx = *DAG.getContext(); + SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; + + EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts); + EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), + NumElts/2); + EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), + NumElts/2); + + Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); + SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(0)); + SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, + DAG.getIntPtrConstant(NumElts/2)); + ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); + ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -5621,6 +5670,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = ExpandVectorExtension(N, DAG); + break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 9409f35974..c2f357e84f 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1010,7 +1010,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1022,7 +1023,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1037,7 +1039,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_imm:$shift))]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1052,7 +1055,8 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]> { + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg_reg:$shift))]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1079,7 +1083,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, let isReMaterializable = 1 in { def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>, + Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> imm; @@ -1091,7 +1096,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, } def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir, opc, "\t$Rd, $Rn, $Rm", - [/* pattern left blank */]> { + [/* pattern left blank */]>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -1105,7 +1111,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, def rsi : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_reg_imm:$shift, GPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1120,7 +1127,8 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, def rsr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, iis, opc, "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]> { + [(set GPR:$Rd, (opnode so_reg_reg:$shift, GPR:$Rn))]>, + Sched<[WriteALUsr, ReadALUsr]> { bits<4> Rd; bits<4> Rn; bits<12> shift; @@ -1145,24 +1153,28 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>; + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>, + Sched<[WriteALU, ReadALU]>; def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), 4, iir, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> { + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>, + Sched<[WriteALU, ReadALU, ReadALU]> { let isCommutable = Commutable; } def rsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_imm:$shift))]>; + so_reg_imm:$shift))]>, + Sched<[WriteALUsi, ReadALU]>; def rsr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, - so_reg_reg:$shift))]>; + so_reg_reg:$shift))]>, + Sched<[WriteALUSsr, ReadALUsr]>; } } @@ -1174,19 +1186,22 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, bit Commutable = 0> { def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>; + [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>, + Sched<[WriteALU, ReadALU]>; def rsi : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_imm:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, - GPR:$Rn))]>; + GPR:$Rn))]>, + Sched<[WriteALUsi, ReadALU]>; def rsr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg_reg:$shift, pred:$p), 4, iis, [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, - GPR:$Rn))]>; + GPR:$Rn))]>, + Sched<[WriteALUSsr, ReadALUsr]>; } } diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 98bd6c168e..e4e683c2a0 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -865,7 +865,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. - if (isLd && MI->getOperand(0).getReg() == Base) + if (MI->getOperand(0).getReg() == Base) return false; unsigned PredReg = 0; diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 02196d06bf..7eb5ff665a 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -6,6 +6,70 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// Instruction scheduling annotations for out-of-order CPUs. +// These annotations are independent of the itinerary class defined below. +// Here we define the subtarget independent read/write per-operand resources. +// The subtarget schedule definitions will then map these to the subtarget's +// resource usages. +// For example: +// The instruction cycle timings table might contain an entry for an operation +// like the following: +// Rd <- ADD Rn, Rm, <shift> Rs +// Uops | Latency from register | Uops - resource requirements - latency +// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 +// | | uopc Rd, Rn, T0 - P01 - 1 +// This is telling us that the result will be available in destination register +// Rd after a minimum of three cycles after the result in Rm and Rs is available +// and one cycle after the result in Rn is available. The micro-ops can execute +// on resource P01. +// To model this, we need to express that we need to dispatch two micro-ops, +// that the resource P01 is needed and that the latency to Rn is different than +// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by +// two. +// We will do this by assigning (abstract) resources to register defs/uses. +// ARMSchedule.td: +// def WriteALUsr : SchedWrite; +// def ReadAdvanceALUsr : ScheRead; +// +// ARMInstrInfo.td: +// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, +// ReadDefault]> { ...} +// ReadAdvance read resources allow us to define "pipeline by-passes" or +// shorter latencies to certain registers as needed in the example above. +// The "ReadDefault" can be omitted. +// Next, the subtarget td file assigns resources to the abstract resources +// defined here. +// ARMScheduleSubtarget.td: +// // Resources. +// def P01 : ProcResource<3>; // ALU unit (3 of it). +// ... +// // Resource usages. +// def : WriteRes<WriteALUsr, [P01, P01]> { +// Latency = 4; // Latency of 4. +// NumMicroOps = 2; // Dispatch 2 micro-ops. +// // The two instances of resource P01 are occupied for one cycle. It is one +// // cycle because these resources happen to be pipelined. +// ResourceCycles = [1, 1]; +// } +// def : ReadAdvance<ReadAdvanceALUsr, 3>; + +// Basic ALU operation. +def WriteALU : SchedWrite; +def ReadALU : SchedRead; + +// Basic ALU with shifts. +def WriteALUsi : SchedWrite; // Shift by immediate. +def WriteALUsr : SchedWrite; // Shift by register. +def WriteALUSsr : SchedWrite; // Shift by register (flag setting). +def ReadALUsr : SchedRead; // Some operands are read later. + +// Define TII for use in SchedVariant Predicates. +def : PredicateProlog<[{ + const ARMBaseInstrInfo *TII = + static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); + (void)TII; +}]>; //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 4191931a5a..382e6cc4cd 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1898,6 +1898,8 @@ def CortexA9Model : SchedMachineModel { //===----------------------------------------------------------------------===// // Define each kind of processor resource and number available. +let SchedModel = CortexA9Model in { + def A9UnitALU : ProcResource<2>; def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; } def A9UnitAGU : ProcResource<1>; @@ -1918,11 +1920,11 @@ def A9WriteI : SchedWriteRes<[A9UnitALU]>; def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } // Basic ALU. -def A9WriteA : SchedWriteRes<[A9UnitALU]>; +def : WriteRes<WriteALU, [A9UnitALU]>; // ALU with operand shifted by immediate. -def A9WriteAsi : SchedWriteRes<[A9UnitALU]> { let Latency = 2; } +def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; } // ALU with operand shifted by register. -def A9WriteAsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } +def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; } // Multiplication def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; } @@ -2003,13 +2005,6 @@ foreach NumCycles = 2-8 in { def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>; } // foreach NumCycles -// Define TII for use in SchedVariant Predicates. -def : PredicateProlog<[{ - const ARMBaseInstrInfo *TII = - static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); - (void)TII; -}]>; - // Define address generation sequences and predicates for 8 flavors of LDMs. foreach NumAddr = 1-8 in { @@ -2254,11 +2249,11 @@ def A9WriteLMfp : SchedWriteVariant<[ // These mov immediate writers are unconditionally expanded with // additive latency. def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>; -def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, A9WriteA]>; +def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>; def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>; // Some ALU operations can read loaded integer values one cycle early. -def A9ReadA : SchedReadAdvance<1, +def A9ReadALU : SchedReadAdvance<1, [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi, A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4, A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8, @@ -2279,26 +2274,25 @@ def A9Read4 : SchedReadAdvance<3>; // This table follows the ARM Cortex-A9 Technical Reference Manuals, // mostly in order. -let SchedModel = CortexA9Model in { def :ItinRW<[A9WriteI], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi, IIC_iMVNi,IIC_iMVNsi, IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>; -def :ItinRW<[A9WriteI,A9ReadA],[IIC_iMVNr]>; +def :ItinRW<[A9WriteI,ReadALU],[IIC_iMVNr]>; def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>; def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>; def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>; def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>; -def :ItinRW<[A9WriteA], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; -def :ItinRW<[A9WriteA, A9ReadA], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; -def :ItinRW<[A9WriteA, A9ReadA, A9ReadA],[IIC_iALUr,IIC_iCMPr]>; -def :ItinRW<[A9WriteAsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; -def :ItinRW<[A9WriteAsi, A9ReadA], [IIC_iALUsi]>; -def :ItinRW<[A9WriteAsi, ReadDefault, A9ReadA], [IIC_iALUsir]>; // RSB -def :ItinRW<[A9WriteAsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; -def :ItinRW<[A9WriteAsr, A9ReadA], [IIC_iALUsr,IIC_iCMPsr]>; +def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>; +def :ItinRW<[WriteALU, ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>; +def :ItinRW<[WriteALU, ReadALU, ReadALU],[IIC_iALUr,IIC_iCMPr]>; +def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>; +def :ItinRW<[WriteALUsi, ReadALU], [IIC_iALUsi]>; +def :ItinRW<[WriteALUsi, ReadDefault, ReadALU], [IIC_iALUsir]>; // RSB +def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>; +def :ItinRW<[A9WriteALUsr, ReadALU], [IIC_iALUsr,IIC_iCMPsr]>; // A9WriteHi ignored for MUL32. def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32, @@ -2371,7 +2365,7 @@ def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu, IIC_iStore_m, IIC_iStore_mu]>; def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>; -def :ItinRW<[A9WriteL, A9WriteAdr, A9WriteA], [IIC_iLoadiALU]>; +def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>; def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>; @@ -2486,4 +2480,11 @@ def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>; def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>; def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>; def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>; + +// Map SchedRWs that are identical for cortexa9 to existing resources. +def : SchedAlias<WriteALUsr, A9WriteALUsr>; +def : SchedAlias<WriteALUSsr, A9WriteALUsr>; +def : SchedAlias<ReadALU, A9ReadALU>; +def : SchedAlias<ReadALUsr, A9ReadALU>; + } // SchedModel = CortexA9Model diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td index e9bc3e0f39..28bb429feb 100644 --- a/lib/Target/ARM/ARMScheduleSwift.td +++ b/lib/Target/ARM/ARMScheduleSwift.td @@ -1078,8 +1078,29 @@ def SwiftModel : SchedMachineModel { let IssueWidth = 3; // 3 micro-ops are dispatched per cycle. let MinLatency = 0; // Data dependencies are allowed within dispatch groups. let LoadLatency = 3; + let MispredictPenalty = 14; // A branch direction mispredict. let Itineraries = SwiftItineraries; } -// TODO: Add Swift processor and scheduler resources. +// Swift resource mapping. +let SchedModel = SwiftModel in { + // Processor resources. + def SwiftUnitP01 : ProcResource<2>; // ALU unit. + def SwiftUnitP0 : ProcResource<1> { let Super = SwiftUnitP01; } // Mul unit. + def SwiftUnitP1 : ProcResource<1> { let Super = SwiftUnitP01; } // Br unit. + def SwiftUnitP2 : ProcResource<1>; // LS unit. + def SwiftUnitDiv : ProcResource<1>; + + // 4.2.4 Arithmetic and Logical. + // ADC,ADD,NEG,RSB,RSC,SBC,SUB,ADR + // AND,BIC, EOR,ORN,ORR + // CLZ,RBIT,REV,REV16,REVSH,PKH + // Single cycle. + def : WriteRes<WriteALU, [SwiftUnitP01]>; + def : WriteRes<WriteALUsi, [SwiftUnitP01]>; + def : WriteRes<WriteALUsr, [SwiftUnitP01]>; + def : WriteRes<WriteALUSsr, [SwiftUnitP01]>; + def : ReadAdvance<ReadALU, 0>; + def : ReadAdvance<ReadALUsr, 2>; +} diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index e11314d4fc..739300e4ef 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -42,12 +43,13 @@ StrictAlign("arm-strict-align", cl::Hidden, cl::desc("Disallow all unaligned memory accesses")); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) + const std::string &FS, const TargetOptions &Options) : ARMGenSubtargetInfo(TT, CPU, FS) , ARMProcFamily(Others) , stackAlignment(4) , CPUString(CPU) , TargetTriple(TT) + , Options(Options) , TargetABI(ARM_ABI_APCS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); @@ -92,6 +94,7 @@ void ARMSubtarget::initializeEnvironment() { AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; + UnsafeFPMath = false; } void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { @@ -162,6 +165,12 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // configuration. if (!StrictAlign && hasV6Ops() && isTargetDarwin()) AllowsUnalignedMem = true; + + // NEON f32 ops are non-IEEE 754 compliant. Darwin is ok with it by default. + uint64_t Bits = getFeatureBits(); + if ((Bits & ARM::ProcA5 || Bits & ARM::ProcA8) && // Where this matters + (Options.UnsafeFPMath || isTargetDarwin())) + UseNEONForSinglePrecisionFP = true; } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8ce22e1de2..5b5ee6aeb8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -26,6 +26,7 @@ namespace llvm { class GlobalValue; class StringRef; +class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: @@ -159,6 +160,9 @@ protected: /// NaCl TRAP instruction is generated instead of the regular TRAP. bool UseNaClTrap; + /// Target machine allowed unsafe FP math (such as use of NEON fp) + bool UnsafeFPMath; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -175,6 +179,9 @@ protected: /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; + /// Options passed via command line that could influence the target + const TargetOptions &Options; + public: enum { isELF, isDarwin @@ -189,7 +196,7 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetOptions &Options); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 774521852a..42c7d2c437 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,11 @@ EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); +static cl::opt<bool> +DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, + cl::desc("Inhibit optimization of S->D register accesses on A15"), + cl::init(false)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -43,7 +48,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), + Subtarget(TT, CPU, FS, Options), JITInfo(), InstrItins(Subtarget.getInstrItineraryData()) { // Default to soft float ABI @@ -164,6 +169,12 @@ bool ARMPassConfig::addPreRegAlloc() { addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isLikeA9()) addPass(createMLxExpansionPass()); + // Since the A15SDOptimizer pass can insert VDUP instructions, it can only be + // enabled when NEON is available. + if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA15() && + getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { + addPass(createA15SDOptimizerPass()); + } return true; } diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 01c04b48cf..1019b972e9 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -177,6 +177,23 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // Single to/from double precision conversions. + static const CostTblEntry<MVT> NEONFltDblTbl[] = { + // Vector fptrunc/fpext conversions. + { ISD::FP_ROUND, MVT::v2f64, 2 }, + { ISD::FP_EXTEND, MVT::v2f32, 2 }, + { ISD::FP_EXTEND, MVT::v4f32, 4 } + }; + + if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND || + ISD == ISD::FP_EXTEND)) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); + int Idx = CostTableLookup<MVT>(NEONFltDblTbl, array_lengthof(NEONFltDblTbl), + ISD, LT.second); + if (Idx != -1) + return LT.first * NEONFltDblTbl[Idx].Cost; + } + EVT SrcTy = TLI->getValueType(Src); EVT DstTy = TLI->getValueType(Dst); @@ -194,17 +211,71 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + // The number of vmovl instructions for the extension. + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, + + // Operations that we legalize using load/stores to the stack. + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4*1 + 16*2 + 2*1 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2*1 + 8*2 + 1 }, + // Vector float <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, + { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, // Vector double <-> i32 conversions. { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 } + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, + { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, + { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } }; if (SrcTy.isVector() && ST->hasNEON()) { @@ -247,7 +318,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return NEONFloatConversionTbl[Idx].Cost; } - // Scalar integer to float conversions. static const TypeConversionCostTblEntry<MVT> NEONIntegerConversionTbl[] = { { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, @@ -303,7 +373,6 @@ unsigned ARMTTI::getCastInstrCost(unsigned Opcode, Type *Dst, return ARMIntegerConversionTbl[Idx].Cost; } - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); } @@ -326,6 +395,25 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + // Lowering of some vector selects is currently far from perfect. + static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6c678fdbd7..ed7b7ec9d2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -316,103 +316,127 @@ class ARMOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; SmallVector<unsigned, 8> Registers; + struct CCOp { + ARMCC::CondCodes Val; + }; + + struct CopOp { + unsigned Val; + }; + + struct CoprocOptionOp { + unsigned Val; + }; + + struct ITMaskOp { + unsigned Mask:4; + }; + + struct MBOptOp { + ARM_MB::MemBOpt Val; + }; + + struct IFlagsOp { + ARM_PROC::IFlags Val; + }; + + struct MMaskOp { + unsigned Val; + }; + + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + // A vector register list is a sequential list of 1 to 4 registers. + struct VectorListOp { + unsigned RegNum; + unsigned Count; + unsigned LaneIndex; + bool isDoubleSpaced; + }; + + struct VectorIndexOp { + unsigned Val; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + /// Combined record for all forms of ARM address expressions. + struct MemoryOp { + unsigned BaseRegNum; + // Offset is in OffsetReg or OffsetImm. If both are zero, no offset + // was specified. + const MCConstantExpr *OffsetImm; // Offset immediate value + unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL + ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg + unsigned ShiftImm; // shift for OffsetReg. + unsigned Alignment; // 0 = no alignment specified + // n = alignment in bytes (2, 4, 8, 16, or 32) + unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) + }; + + struct PostIdxRegOp { + unsigned RegNum; + bool isAdd; + ARM_AM::ShiftOpc ShiftTy; + unsigned ShiftImm; + }; + + struct ShifterImmOp { + bool isASR; + unsigned Imm; + }; + + struct RegShiftedRegOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftReg; + unsigned ShiftImm; + }; + + struct RegShiftedImmOp { + ARM_AM::ShiftOpc ShiftTy; + unsigned SrcReg; + unsigned ShiftImm; + }; + + struct RotImmOp { + unsigned Imm; + }; + + struct BitfieldOp { + unsigned LSB; + unsigned Width; + }; + union { - struct { - ARMCC::CondCodes Val; - } CC; - - struct { - unsigned Val; - } Cop; - - struct { - unsigned Val; - } CoprocOption; - - struct { - unsigned Mask:4; - } ITMask; - - struct { - ARM_MB::MemBOpt Val; - } MBOpt; - - struct { - ARM_PROC::IFlags Val; - } IFlags; - - struct { - unsigned Val; - } MMask; - - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - // A vector register list is a sequential list of 1 to 4 registers. - struct { - unsigned RegNum; - unsigned Count; - unsigned LaneIndex; - bool isDoubleSpaced; - } VectorList; - - struct { - unsigned Val; - } VectorIndex; - - struct { - const MCExpr *Val; - } Imm; - - /// Combined record for all forms of ARM address expressions. - struct { - unsigned BaseRegNum; - // Offset is in OffsetReg or OffsetImm. If both are zero, no offset - // was specified. - const MCConstantExpr *OffsetImm; // Offset immediate value - unsigned OffsetRegNum; // Offset register num, when OffsetImm == NULL - ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg - unsigned ShiftImm; // shift for OffsetReg. - unsigned Alignment; // 0 = no alignment specified - // n = alignment in bytes (2, 4, 8, 16, or 32) - unsigned isNegative : 1; // Negated OffsetReg? (~'U' bit) - } Memory; - - struct { - unsigned RegNum; - bool isAdd; - ARM_AM::ShiftOpc ShiftTy; - unsigned ShiftImm; - } PostIdxReg; - - struct { - bool isASR; - unsigned Imm; - } ShifterImm; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftReg; - unsigned ShiftImm; - } RegShiftedReg; - struct { - ARM_AM::ShiftOpc ShiftTy; - unsigned SrcReg; - unsigned ShiftImm; - } RegShiftedImm; - struct { - unsigned Imm; - } RotImm; - struct { - unsigned LSB; - unsigned Width; - } Bitfield; + struct CCOp CC; + struct CopOp Cop; + struct CoprocOptionOp CoprocOption; + struct MBOptOp MBOpt; + struct ITMaskOp ITMask; + struct IFlagsOp IFlags; + struct MMaskOp MMask; + struct TokOp Tok; + struct RegOp Reg; + struct VectorListOp VectorList; + struct VectorIndexOp VectorIndex; + struct ImmOp Imm; + struct MemoryOp Memory; + struct PostIdxRegOp PostIdxReg; + struct ShifterImmOp ShifterImm; + struct RegShiftedRegOp RegShiftedReg; + struct RegShiftedImmOp RegShiftedImm; + struct RotImmOp RotImm; + struct BitfieldOp Bitfield; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -4569,20 +4593,26 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, Error(Parser.getTok().getLoc(), "unexpected token in operand"); return true; case AsmToken::Identifier: { - if (!tryParseRegisterWithWriteBack(Operands)) - return false; - int Res = tryParseShiftRegister(Operands); - if (Res == 0) // success - return false; - else if (Res == -1) // irrecoverable error - return true; - // If this is VMRS, check for the apsr_nzcv operand. - if (Mnemonic == "vmrs" && - Parser.getTok().getString().equals_lower("apsr_nzcv")) { - S = Parser.getTok().getLoc(); - Parser.Lex(); - Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); - return false; + // If we've seen a branch mnemonic, the next operand must be a label. This + // is true even if the label is a register name. So "br r1" means branch to + // label "r1". + bool ExpectLabel = Mnemonic == "b" || Mnemonic == "bl"; + if (!ExpectLabel) { + if (!tryParseRegisterWithWriteBack(Operands)) + return false; + int Res = tryParseShiftRegister(Operands); + if (Res == 0) // success + return false; + else if (Res == -1) // irrecoverable error + return true; + // If this is VMRS, check for the apsr_nzcv operand. + if (Mnemonic == "vmrs" && + Parser.getTok().getString().equals_lower("apsr_nzcv")) { + S = Parser.getTok().getLoc(); + Parser.Lex(); + Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S)); + return false; + } } // Fall though for the Identifier case that is not a register or a diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 586834cf73..b832508a08 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen + A15SDOptimizer.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 31a3b0b524..2e009e55e3 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -3049,9 +3049,9 @@ static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val, static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4, + if (!tryAddingSymbolicOperand(Address, Address + (Val<<1) + 4, true, 2, Inst, Decoder)) - Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1))); + Inst.addOperand(MCOperand::CreateImm(Val << 1)); return MCDisassembler::Success; } @@ -3278,7 +3278,7 @@ static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; } - if (!Check(S, DecoderGPRRegisterClass(Inst, Rt, Address, Decoder))) + if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; if (load) { diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index 463c440852..a64707e6f3 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -173,7 +173,6 @@ GCC is doing a couple of clever things here: mov r1, #1 lsl r1, r1, #8 tst r2, r1 - //===---------------------------------------------------------------------===// @@ -196,7 +195,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack objects are referenced off the frame pointer with negative offsets. See oggenc for an example. - //===---------------------------------------------------------------------===// Poor codegen test/CodeGen/ARM/select.ll f7: diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 609d502aa5..7452fb776e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -588,7 +588,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // means the stack pointer cannot be used to access the emergency spill slot // when !hasReservedCallFrame(). #ifndef NDEBUG - if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){ + if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 604abf93cc..3e69098edc 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -518,7 +518,6 @@ void CppWriter::printAttributes(const AttributeSet &PAL, attrs.removeAttribute(Attribute::StackAlignment); } - assert(!attrs.hasAttributes() && "Unhandled attribute!"); Out << "PAS = AttributeSet::get(mod->getContext(), "; if (index == ~0U) Out << "~0U,"; diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 62aed1353c..178662447a 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -701,7 +701,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If the induction variable bump is not a power of 2, quit. // Othwerise we'd need a general integer division. - if (!isPowerOf2_64(abs(IVBump))) + if (!isPowerOf2_64(abs64(IVBump))) return 0; MachineBasicBlock *PH = Loop->getLoopPreheader(); @@ -1430,7 +1430,6 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( return 0; typedef MachineBasicBlock::instr_iterator instr_iterator; - typedef MachineBasicBlock::pred_iterator pred_iterator; // Verify that all existing predecessors have analyzable branches // (or no branches at all). diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 3a1c48bac9..8fc9ba1ee8 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -113,6 +113,46 @@ public: SDNode *SelectAdd(SDNode *N); bool isConstExtProfitable(SDNode *N) const; +// XformMskToBitPosU5Imm - Returns the bit position which +// the single bit 32 bit mask represents. +// Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU5Imm(uint32_t Imm) { + int32_t bitPos; + bitPos = Log2_32(Imm); + assert(bitPos >= 0 && bitPos < 32 && + "Constant out of range for 32 BitPos Memops"); + return CurDAG->getTargetConstant(bitPos, MVT::i32); +} + +// XformMskToBitPosU4Imm - Returns the bit position which the single bit 16 bit +// mask represents. Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU4Imm(uint16_t Imm) { + return XformMskToBitPosU5Imm(Imm); +} + +// XformMskToBitPosU3Imm - Returns the bit position which the single bit 8 bit +// mask represents. Used in Clr and Set bit immediate memops. +SDValue XformMskToBitPosU3Imm(uint8_t Imm) { + return XformMskToBitPosU5Imm(Imm); +} + +// Return true if there is exactly one bit set in V, i.e., if V is one of the +// following integers: 2^0, 2^1, ..., 2^31. +bool ImmIsSingleBit(uint32_t v) const { + uint32_t c = CountPopulation_64(v); + // Only return true if we counted 1 bit. + return c == 1; +} + +// XformM5ToU5Imm - Return a target constant with the specified value, of type +// i32 where the negative literal is transformed into a positive literal for +// use in -= memops. +inline SDValue XformM5ToU5Imm(signed Imm) { + assert( (Imm >= -31 && Imm <= -1) && "Constant out of range for Memops"); + return CurDAG->getTargetConstant( - Imm, MVT::i32); +} + + // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range // [1..128], used in cmpb.gtu instructions. inline SDValue XformU7ToU7M1Imm(signed Imm) { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 4c0f93c6cd..60b12ac01c 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -537,6 +537,15 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return(0); } +MachineInstr* +HexagonInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(Hexagon::DBG_VALUE)) + .addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { @@ -1881,6 +1890,13 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const { return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask); } +bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const { + const uint64_t F = MI->getDesc().TSFlags; + + assert(isPredicated(MI)); + return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask); +} + bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const { @@ -1949,6 +1965,10 @@ isValidOffset(const int Opcode, const int Offset) const { // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is // inserted to calculate the final address. Due to this reason, the function // assumes that the "Offset" has correct alignment. + // We used to assert if the offset was not properly aligned, however, + // there are cases where a misaligned pointer recast can cause this + // problem, and we need to allow for it. The front end warns of such + // misaligns with respect to load size. switch(Opcode) { @@ -1958,7 +1978,6 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STriw_indexed: case Hexagon::STriw: case Hexagon::STriw_f: - assert((Offset % 4 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMW_OFFSET_MIN) && (Offset <= Hexagon_MEMW_OFFSET_MAX); @@ -1968,14 +1987,12 @@ isValidOffset(const int Opcode, const int Offset) const { case Hexagon::STrid: case Hexagon::STrid_indexed: case Hexagon::STrid_f: - assert((Offset % 8 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMD_OFFSET_MIN) && (Offset <= Hexagon_MEMD_OFFSET_MAX); case Hexagon::LDrih: case Hexagon::LDriuh: case Hexagon::STrih: - assert((Offset % 2 == 0) && "Offset has incorrect alignment"); return (Offset >= Hexagon_MEMH_OFFSET_MIN) && (Offset <= Hexagon_MEMH_OFFSET_MAX); @@ -1990,48 +2007,28 @@ isValidOffset(const int Opcode, const int Offset) const { return (Offset >= Hexagon_ADDI_OFFSET_MIN) && (Offset <= Hexagon_ADDI_OFFSET_MAX); - case Hexagon::MEMw_ADDi_indexed_MEM_V4 : - case Hexagon::MEMw_SUBi_indexed_MEM_V4 : - case Hexagon::MEMw_ADDr_indexed_MEM_V4 : - case Hexagon::MEMw_SUBr_indexed_MEM_V4 : - case Hexagon::MEMw_ANDr_indexed_MEM_V4 : - case Hexagon::MEMw_ORr_indexed_MEM_V4 : - case Hexagon::MEMw_ADDi_MEM_V4 : - case Hexagon::MEMw_SUBi_MEM_V4 : - case Hexagon::MEMw_ADDr_MEM_V4 : - case Hexagon::MEMw_SUBr_MEM_V4 : - case Hexagon::MEMw_ANDr_MEM_V4 : - case Hexagon::MEMw_ORr_MEM_V4 : - assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." ); + case Hexagon::MemOPw_ADDi_V4 : + case Hexagon::MemOPw_SUBi_V4 : + case Hexagon::MemOPw_ADDr_V4 : + case Hexagon::MemOPw_SUBr_V4 : + case Hexagon::MemOPw_ANDr_V4 : + case Hexagon::MemOPw_ORr_V4 : return (0 <= Offset && Offset <= 255); - case Hexagon::MEMh_ADDi_indexed_MEM_V4 : - case Hexagon::MEMh_SUBi_indexed_MEM_V4 : - case Hexagon::MEMh_ADDr_indexed_MEM_V4 : - case Hexagon::MEMh_SUBr_indexed_MEM_V4 : - case Hexagon::MEMh_ANDr_indexed_MEM_V4 : - case Hexagon::MEMh_ORr_indexed_MEM_V4 : - case Hexagon::MEMh_ADDi_MEM_V4 : - case Hexagon::MEMh_SUBi_MEM_V4 : - case Hexagon::MEMh_ADDr_MEM_V4 : - case Hexagon::MEMh_SUBr_MEM_V4 : - case Hexagon::MEMh_ANDr_MEM_V4 : - case Hexagon::MEMh_ORr_MEM_V4 : - assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." ); + case Hexagon::MemOPh_ADDi_V4 : + case Hexagon::MemOPh_SUBi_V4 : + case Hexagon::MemOPh_ADDr_V4 : + case Hexagon::MemOPh_SUBr_V4 : + case Hexagon::MemOPh_ANDr_V4 : + case Hexagon::MemOPh_ORr_V4 : return (0 <= Offset && Offset <= 127); - case Hexagon::MEMb_ADDi_indexed_MEM_V4 : - case Hexagon::MEMb_SUBi_indexed_MEM_V4 : - case Hexagon::MEMb_ADDr_indexed_MEM_V4 : - case Hexagon::MEMb_SUBr_indexed_MEM_V4 : - case Hexagon::MEMb_ANDr_indexed_MEM_V4 : - case Hexagon::MEMb_ORr_indexed_MEM_V4 : - case Hexagon::MEMb_ADDi_MEM_V4 : - case Hexagon::MEMb_SUBi_MEM_V4 : - case Hexagon::MEMb_ADDr_MEM_V4 : - case Hexagon::MEMb_SUBr_MEM_V4 : - case Hexagon::MEMb_ANDr_MEM_V4 : - case Hexagon::MEMb_ORr_MEM_V4 : + case Hexagon::MemOPb_ADDi_V4 : + case Hexagon::MemOPb_SUBi_V4 : + case Hexagon::MemOPb_ADDr_V4 : + case Hexagon::MemOPb_SUBr_V4 : + case Hexagon::MemOPb_ANDr_V4 : + case Hexagon::MemOPb_ORr_V4 : return (0 <= Offset && Offset <= 63); // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of @@ -2087,44 +2084,33 @@ isMemOp(const MachineInstr *MI) const { switch (MI->getOpcode()) { default: return false; - case Hexagon::MEMw_ADDi_indexed_MEM_V4 : - case Hexagon::MEMw_SUBi_indexed_MEM_V4 : - case Hexagon::MEMw_ADDr_indexed_MEM_V4 : - case Hexagon::MEMw_SUBr_indexed_MEM_V4 : - case Hexagon::MEMw_ANDr_indexed_MEM_V4 : - case Hexagon::MEMw_ORr_indexed_MEM_V4 : - case Hexagon::MEMw_ADDi_MEM_V4 : - case Hexagon::MEMw_SUBi_MEM_V4 : - case Hexagon::MEMw_ADDr_MEM_V4 : - case Hexagon::MEMw_SUBr_MEM_V4 : - case Hexagon::MEMw_ANDr_MEM_V4 : - case Hexagon::MEMw_ORr_MEM_V4 : - case Hexagon::MEMh_ADDi_indexed_MEM_V4 : - case Hexagon::MEMh_SUBi_indexed_MEM_V4 : - case Hexagon::MEMh_ADDr_indexed_MEM_V4 : - case Hexagon::MEMh_SUBr_indexed_MEM_V4 : - case Hexagon::MEMh_ANDr_indexed_MEM_V4 : - case Hexagon::MEMh_ORr_indexed_MEM_V4 : - case Hexagon::MEMh_ADDi_MEM_V4 : - case Hexagon::MEMh_SUBi_MEM_V4 : - case Hexagon::MEMh_ADDr_MEM_V4 : - case Hexagon::MEMh_SUBr_MEM_V4 : - case Hexagon::MEMh_ANDr_MEM_V4 : - case Hexagon::MEMh_ORr_MEM_V4 : - case Hexagon::MEMb_ADDi_indexed_MEM_V4 : - case Hexagon::MEMb_SUBi_indexed_MEM_V4 : - case Hexagon::MEMb_ADDr_indexed_MEM_V4 : - case Hexagon::MEMb_SUBr_indexed_MEM_V4 : - case Hexagon::MEMb_ANDr_indexed_MEM_V4 : - case Hexagon::MEMb_ORr_indexed_MEM_V4 : - case Hexagon::MEMb_ADDi_MEM_V4 : - case Hexagon::MEMb_SUBi_MEM_V4 : - case Hexagon::MEMb_ADDr_MEM_V4 : - case Hexagon::MEMb_SUBr_MEM_V4 : - case Hexagon::MEMb_ANDr_MEM_V4 : - case Hexagon::MEMb_ORr_MEM_V4 : - return true; + case Hexagon::MemOPw_ADDi_V4 : + case Hexagon::MemOPw_SUBi_V4 : + case Hexagon::MemOPw_ADDr_V4 : + case Hexagon::MemOPw_SUBr_V4 : + case Hexagon::MemOPw_ANDr_V4 : + case Hexagon::MemOPw_ORr_V4 : + case Hexagon::MemOPh_ADDi_V4 : + case Hexagon::MemOPh_SUBi_V4 : + case Hexagon::MemOPh_ADDr_V4 : + case Hexagon::MemOPh_SUBr_V4 : + case Hexagon::MemOPh_ANDr_V4 : + case Hexagon::MemOPh_ORr_V4 : + case Hexagon::MemOPb_ADDi_V4 : + case Hexagon::MemOPb_SUBi_V4 : + case Hexagon::MemOPb_ADDr_V4 : + case Hexagon::MemOPb_SUBr_V4 : + case Hexagon::MemOPb_ANDr_V4 : + case Hexagon::MemOPb_ORr_V4 : + case Hexagon::MemOPb_SETBITi_V4: + case Hexagon::MemOPh_SETBITi_V4: + case Hexagon::MemOPw_SETBITi_V4: + case Hexagon::MemOPb_CLRBITi_V4: + case Hexagon::MemOPh_CLRBITi_V4: + case Hexagon::MemOPw_CLRBITi_V4: + return true; } + return false; } @@ -2383,6 +2369,13 @@ isConditionalStore (const MachineInstr* MI) const { } } +// Returns true, if any one of the operands is a dot new +// insn, whether it is predicated dot new or register dot new. +bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const { + return (isNewValueInst(MI) || + (isPredicated(MI) && isPredicatedNew(MI))); +} + unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const { const uint64_t F = MI->getDesc().TSFlags; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index d2f059aa79..5df13a88b5 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -127,6 +127,7 @@ public: const BranchProbability &Probability) const; virtual bool isPredicated(const MachineInstr *MI) const; + virtual bool isPredicatedNew(const MachineInstr *MI) const; virtual bool DefinesPredicate(MachineInstr *MI, std::vector<MachineOperand> &Pred) const; virtual bool @@ -140,6 +141,11 @@ public: isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, const BranchProbability &Probability) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; virtual DFAPacketizer* CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const; @@ -170,6 +176,7 @@ public: bool isConditionalLoad (const MachineInstr* MI) const; bool isConditionalStore(const MachineInstr* MI) const; bool isNewValueInst(const MachineInstr* MI) const; + bool isDotNewInst(const MachineInstr* MI) const; bool isDeallocRet(const MachineInstr *MI) const; unsigned getInvertedPredicatedOpcode(const int Opc) const; bool isExtendable(const MachineInstr* MI) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index d7bab200f9..74dc0ca72a 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -446,38 +446,58 @@ def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2, s8ExtPred:$src2, s8ImmPred:$src3)))]>; -// Shift halfword. -let isPredicable = 1 in -def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = aslh($src1)", - [(set (i32 IntRegs:$dst), (shl 16, (i32 IntRegs:$src1)))]>; +// ALU32 - aslh, asrh, sxtb, sxth, zxtb, zxth +multiclass ALU32_2op_Pbase<string mnemonic, bit isNot, bit isPredNew> { + let isPredicatedNew = isPredNew in + def NAME : ALU32Inst<(outs IntRegs:$dst), + (ins PredRegs:$src1, IntRegs:$src2), + !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", + ") $dst = ")#mnemonic#"($src2)">, + Requires<[HasV4T]>; +} -let isPredicable = 1 in -def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = asrh($src1)", - [(set (i32 IntRegs:$dst), (sra 16, (i32 IntRegs:$src1)))]>; +multiclass ALU32_2op_Pred<string mnemonic, bit PredNot> { + let isPredicatedFalse = PredNot in { + defm _c#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 0>; + // Predicate new + defm _cdn#NAME : ALU32_2op_Pbase<mnemonic, PredNot, 1>; + } +} -// Sign extend. -let isPredicable = 1 in -def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = sxtb($src1)", - [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i8))]>; +multiclass ALU32_2op_base<string mnemonic> { + let BaseOpcode = mnemonic in { + let isPredicable = 1, neverHasSideEffects = 1 in + def NAME : ALU32Inst<(outs IntRegs:$dst), + (ins IntRegs:$src1), + "$dst = "#mnemonic#"($src1)">; -let isPredicable = 1 in -def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = sxth($src1)", - [(set (i32 IntRegs:$dst), (sext_inreg (i32 IntRegs:$src1), i16))]>; - -// Zero extend. -let isPredicable = 1, neverHasSideEffects = 1 in -def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = zxtb($src1)", - []>; + let Predicates = [HasV4T], validSubTargets = HasV4SubT, isPredicated = 1, + neverHasSideEffects = 1 in { + defm Pt_V4 : ALU32_2op_Pred<mnemonic, 0>; + defm NotPt_V4 : ALU32_2op_Pred<mnemonic, 1>; + } + } +} + +defm ASLH : ALU32_2op_base<"aslh">, PredNewRel; +defm ASRH : ALU32_2op_base<"asrh">, PredNewRel; +defm SXTB : ALU32_2op_base<"sxtb">, PredNewRel; +defm SXTH : ALU32_2op_base<"sxth">, PredNewRel; +defm ZXTB : ALU32_2op_base<"zxtb">, PredNewRel; +defm ZXTH : ALU32_2op_base<"zxth">, PredNewRel; + +def : Pat <(shl (i32 IntRegs:$src1), (i32 16)), + (ASLH IntRegs:$src1)>; + +def : Pat <(sra (i32 IntRegs:$src1), (i32 16)), + (ASRH IntRegs:$src1)>; + +def : Pat <(sext_inreg (i32 IntRegs:$src1), i8), + (SXTB IntRegs:$src1)>; + +def : Pat <(sext_inreg (i32 IntRegs:$src1), i16), + (SXTH IntRegs:$src1)>; -let isPredicable = 1, neverHasSideEffects = 1 in -def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), - "$dst = zxth($src1)", - []>; //===----------------------------------------------------------------------===// // ALU32/PERM - //===----------------------------------------------------------------------===// @@ -488,29 +508,30 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1), //===----------------------------------------------------------------------===// // Conditional combine. -let neverHasSideEffects = 1, isPredicated = 1 in +let neverHasSideEffects = 1, isPredicated = 1 in { def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1, isPredicated = 1 in +let isPredicatedFalse = 1 in def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1, isPredicated = 1 in +let isPredicatedNew = 1 in def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if ($src1.new) $dst = combine($src2, $src3)", []>; -let neverHasSideEffects = 1, isPredicated = 1 in +let isPredicatedNew = 1, isPredicatedFalse = 1 in def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), "if (!$src1.new) $dst = combine($src2, $src3)", []>; +} // Compare. defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel; @@ -1009,20 +1030,6 @@ def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))), (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >; } -let neverHasSideEffects = 1 in -def LDrid_GP : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memd(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDd_GP : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global), - "$dst = memd(#$global)", - []>, - Requires<[NoV4T]>; - //===----------------------------------------------------------------------===// // Post increment load // Make sure that in post increment load, the first operand is always the post @@ -1095,27 +1102,6 @@ let AddedComplexity = 20 in def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))), (i32 (LDrib_indexed IntRegs:$src1, s11_0ImmPred:$offset)) >; -let neverHasSideEffects = 1 in -def LDrib_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memb(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDb_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memb(#$global)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDub_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memub(#$global)", - []>, - Requires<[NoV4T]>; - def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)), (i32 (LDrih ADDRriS11_1:$addr))>; @@ -1123,27 +1109,6 @@ let AddedComplexity = 20 in def : Pat < (i32 (extloadi16 (add IntRegs:$src1, s11_1ImmPred:$offset))), (i32 (LDrih_indexed IntRegs:$src1, s11_1ImmPred:$offset)) >; -let neverHasSideEffects = 1 in -def LDrih_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memh(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDh_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memh(#$global)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDuh_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memuh(#$global)", - []>, - Requires<[NoV4T]>; - let AddedComplexity = 10 in def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)), (i32 (LDriub ADDRriS11_0:$addr))>; @@ -1152,21 +1117,6 @@ let AddedComplexity = 20 in def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))), (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>; -let neverHasSideEffects = 1 in -def LDriub_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memub(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -// Load unsigned halfword. -let neverHasSideEffects = 1 in -def LDriuh_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memuh(#$global+$offset)", - []>, - Requires<[NoV4T]>; - // Load predicate. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, isPseudo = 1, Defs = [R10,R11,D5], neverHasSideEffects = 1 in @@ -1175,21 +1125,6 @@ def LDriw_pred : LDInst2<(outs PredRegs:$dst), "Error; should not emit", []>; -// Indexed load. -let neverHasSideEffects = 1 in -def LDriw_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global, u16Imm:$offset), - "$dst = memw(#$global+$offset)", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def LDw_GP : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst = memw(#$global)", - []>, - Requires<[NoV4T]>; - // Deallocate stack frame. let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in { def DEALLOCFRAME : LDInst2<(outs), (ins), @@ -1423,28 +1358,8 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1, // ST + //===----------------------------------------------------------------------===// /// -/// Assumptions::: ****** DO NOT IGNORE ******** -/// 1. Make sure that in post increment store, the zero'th operand is always the -/// post increment operand. -/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the -/// last operand. -/// // Store doubleword. -let neverHasSideEffects = 1 in -def STrid_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src), - "memd(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STd_GP : STInst2<(outs), - (ins globaladdress:$global, DoubleRegs:$src), - "memd(#$global) = $src", - []>, - Requires<[NoV4T]>; - //===----------------------------------------------------------------------===// // Post increment store //===----------------------------------------------------------------------===// @@ -1655,36 +1570,6 @@ def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2, (i64 DoubleRegs:$src1))>; } -// memb(gp+#u16:0)=Rt -let neverHasSideEffects = 1 in -def STrib_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memb(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -// memb(#global)=Rt -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STb_GP : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memb(#$global) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1 in -def STrih_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memh(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STh_GP : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memh(#$global) = $src", - []>, - Requires<[NoV4T]>; - // memh(Rx++#s4:1)=Rt.H // Store word. @@ -1695,20 +1580,6 @@ def STriw_pred : STInst2<(outs), "Error; should not emit", []>; -let neverHasSideEffects = 1 in -def STriw_GP : STInst2<(outs), - (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src), - "memw(#$global+$offset) = $src", - []>, - Requires<[NoV4T]>; - -let neverHasSideEffects = 1, validSubTargets = NoV4SubT in -def STw_GP : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memw(#$global) = $src", - []>, - Requires<[NoV4T]>; - // Allocate stack frame. let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in { def ALLOCFRAME : STInst2<(outs), @@ -2183,68 +2054,26 @@ def : Pat<(HexagonTCRet (i32 IntRegs:$dst)), // Atomic load and store support // 8 bit atomic load -def : Pat<(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDub_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_8 ADDRriS11_0:$src1), (i32 (LDriub ADDRriS11_0:$src1))>; def : Pat<(atomic_load_8 (add (i32 IntRegs:$src1), s11_0ImmPred:$offset)), (i32 (LDriub_indexed (i32 IntRegs:$src1), s11_0ImmPred:$offset))>; - - // 16 bit atomic load -def : Pat<(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDuh_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_16 ADDRriS11_1:$src1), (i32 (LDriuh ADDRriS11_1:$src1))>; def : Pat<(atomic_load_16 (add (i32 IntRegs:$src1), s11_1ImmPred:$offset)), (i32 (LDriuh_indexed (i32 IntRegs:$src1), s11_1ImmPred:$offset))>; - - -// 32 bit atomic load -def : Pat<(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDw_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_32 ADDRriS11_2:$src1), (i32 (LDriw ADDRriS11_2:$src1))>; def : Pat<(atomic_load_32 (add (i32 IntRegs:$src1), s11_2ImmPred:$offset)), (i32 (LDriw_indexed (i32 IntRegs:$src1), s11_2ImmPred:$offset))>; - // 64 bit atomic load -def : Pat<(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), - (i64 (LDd_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - def : Pat<(atomic_load_64 ADDRriS11_3:$src1), (i64 (LDrid ADDRriS11_3:$src1))>; @@ -2252,30 +2081,6 @@ def : Pat<(atomic_load_64 (add (i32 IntRegs:$src1), s11_3ImmPred:$offset)), (i64 (LDrid_indexed (i32 IntRegs:$src1), s11_3ImmPred:$offset))>; -// 64 bit atomic store -def : Pat<(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), - (i64 DoubleRegs:$src1)), - (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i64 DoubleRegs:$src1)), - (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>; - -// 8 bit atomic store -def : Pat<(atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, Requires<[NoV4T]>; - def : Pat<(atomic_store_8 ADDRriS11_0:$src2, (i32 IntRegs:$src1)), (STrib ADDRriS11_0:$src2, (i32 IntRegs:$src1))>; @@ -2285,18 +2090,6 @@ def : Pat<(atomic_store_8 (add (i32 IntRegs:$src2), s11_0ImmPred:$offset), (i32 IntRegs:$src1))>; -// 16 bit atomic store -def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, Requires<[NoV4T]>; - def : Pat<(atomic_store_16 ADDRriS11_1:$src2, (i32 IntRegs:$src1)), (STrih ADDRriS11_1:$src2, (i32 IntRegs:$src1))>; @@ -2305,20 +2098,6 @@ def : Pat<(atomic_store_16 (i32 IntRegs:$src1), (STrih_indexed (i32 IntRegs:$src2), s11_1ImmPred:$offset, (i32 IntRegs:$src1))>; - -// 32 bit atomic store -def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STw_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset), - (i32 IntRegs:$src1)), - (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, - (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - def : Pat<(atomic_store_32 ADDRriS11_2:$src2, (i32 IntRegs:$src1)), (STriw ADDRriS11_2:$src2, (i32 IntRegs:$src1))>; @@ -2387,198 +2166,8 @@ def : Pat <(brcond (not PredRegs:$src1), bb:$offset), def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)), (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; -// Map from store(globaladdress + x) -> memd(#foo + x). -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, - (i64 DoubleRegs:$src1))>, Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memd(#foo). -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STd_GP tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress + x) -> memw(#foo + x). -let AddedComplexity = 100 in -def : Pat <(store (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memw(#foo + 0). -let AddedComplexity = 100 in -def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>; - -// Map from store(globaladdress) -> memw(#foo). -let AddedComplexity = 100 in -def : Pat <(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STriw_GP tglobaladdr:$global, 0, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress + x) -> memh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(truncstorei16 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memh(#foo). -let AddedComplexity = 100 in -def : Pat <(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STh_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress + x) -> memb(#foo + x). -let AddedComplexity = 100 in -def : Pat <(truncstorei8 (i32 IntRegs:$src1), - (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset)), - (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from store(globaladdress) -> memb(#foo). -let AddedComplexity = 100 in -def : Pat <(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memw(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memw(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDw_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memd(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i64 (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memw(#foo + 0). -let AddedComplexity = 100 in -def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i64 (LDd_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd. -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (TFR_PdRs (i32 (LDb_GP tglobaladdr:$global))))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDrih_GP tglobaladdr:$global, 0))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memuh(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memuh(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDriuh_GP tglobaladdr:$global, 0))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memh(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memuh(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDuh_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memb(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress + x) -> memub(#foo + x). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global), - u16ImmPred:$offset))), - (i32 (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memb(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memb(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// Map from load(globaladdress) -> memub(#foo). -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -// When the Interprocedural Global Variable optimizer realizes that a -// certain global variable takes only two constant values, it shrinks the -// global to a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP tglobaladdr:$global))>, - Requires<[NoV4T]>; - // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. +let AddedComplexity = 10 in def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), (i32 (AND_rr (i32 (LDrib ADDRriS11_0:$addr)), (TFRI 0x1)))>; @@ -2694,12 +2283,6 @@ def : Pat<(truncstorei32 (i64 DoubleRegs:$src), ADDRriS11_0:$addr), def : Pat<(store (i1 -1), ADDRriS11_2:$addr), (STrib ADDRriS11_2:$addr, (TFRI 1))>; -let AddedComplexity = 100 in -// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1; -// memw(#foo) = r0 -def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP tglobaladdr:$global, (TFRI 1))>, - Requires<[NoV4T]>; // Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0. def : Pat<(store (i1 -1), ADDRriS11_2:$addr), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 1d0643d03b..cd0e475896 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -95,164 +95,6 @@ def NumUsesBelowThresCONST32 : PatFrag<(ops node:$addr), //===----------------------------------------------------------------------===// // ALU32 + //===----------------------------------------------------------------------===// - -// Shift halfword. - -let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in { -def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = aslh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; - -def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = asrh($src2)", - []>, - Requires<[HasV4T]>; -} - -// Sign extend. - -let isPredicated = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in { -def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - -def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - -def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - -def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = sxtb($src2)", - []>, - Requires<[HasV4T]>; - - -def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; - -def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; - -def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; - -def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = sxth($src2)", - []>, - Requires<[HasV4T]>; -} - -// Zero exten. - -let neverHasSideEffects = 1, isPredicated = 1, validSubTargets = HasV4SubT in { -def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = zxtb($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if ($src1.new) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; - -def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2), - "if (!$src1.new) $dst = zxth($src2)", - []>, - Requires<[HasV4T]>; -} - // Generate frame index addresses. let neverHasSideEffects = 1, isReMaterializable = 1, isExtended = 1, opExtendable = 2, validSubTargets = HasV4SubT in @@ -596,329 +438,6 @@ def : Pat <(i32 (load (add IntRegs:$src1, IntRegs:$src2))), Requires<[HasV4T]>; } -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDd_GP_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins globaladdress:$global), - "$dst=memd(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rtt=memd(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDd_GP_cPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rtt=memd(##global) -def LDd_GP_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rtt=memd(##global) -def LDd_GP_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rtt=memd(##global) -def LDd_GP_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memd(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDb_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memb(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memb(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDb_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memb(##global) -def LDb_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memb(##global) -def LDb_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memb(##global) -def LDb_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memb(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDub_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memub(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memub(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDub_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memub(##global) -def LDub_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memub(##global) -def LDub_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memub(##global) -def LDub_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memub(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDh_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memh(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memh(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memh(##global) -def LDh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memh(##global) -def LDh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memh(##global) -def LDh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memh(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDuh_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memuh(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memuh(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDuh_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memuh(##global) -def LDuh_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memuh(##global) -def LDuh_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; - -// if (!Pv) Rt=memuh(##global) -def LDuh_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memuh(##$global)", - []>, - Requires<[HasV4T]>; -} - -let isPredicable = 1, neverHasSideEffects = 1, validSubTargets = HasV4SubT in -def LDw_GP_V4 : LDInst2<(outs IntRegs:$dst), - (ins globaladdress:$global), - "$dst=memw(#$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memw(##global) -let neverHasSideEffects = 1, isPredicated = 1, isExtended = 1, opExtendable = 2, -validSubTargets = HasV4SubT in { -def LDw_GP_cPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memw(##global) -def LDw_GP_cNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; - -// if (Pv) Rt=memw(##global) -def LDw_GP_cdnPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if ($src1.new) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; - - -// if (!Pv) Rt=memw(##global) -def LDw_GP_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst), - (ins PredRegs:$src1, globaladdress:$global), - "if (!$src1.new) $dst=memw(##$global)", - []>, - Requires<[HasV4T]>; -} - - -def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), - (i64 (LDd_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDw_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDuh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), - (i32 (LDub_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memw(#foo + 0) -let AddedComplexity = 100 in -def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i64 (LDd_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd -let AddedComplexity = 100 in -def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>, - Requires<[HasV4T]>; - -// When the Interprocedural Global Variable optimizer realizes that a certain -// global variable takes only two constant values, it shrinks the global to -// a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDb_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memub(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDub_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memuh(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDuh_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - -// Map from load(globaladdress) -> memw(#foo) -let AddedComplexity = 100 in -def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), - (i32 (LDw_GP_V4 tglobaladdr:$global))>, - Requires<[HasV4T]>; - // zext i1->i64 def : Pat <(i64 (zext (i1 PredRegs:$src1))), (i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>, @@ -1374,225 +893,6 @@ def STriw_shl_V4 : STInst<(outs), // memw(Rx++I:circ(Mu))=Rt // memw(Rx++Mu)=Rt // memw(Rx++Mu:brev)=Rt -// memw(gp+#u16:2)=Rt - - -// memd(#global)=Rtt -let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1, -validSubTargets = HasV4SubT in -def STd_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, DoubleRegs:$src), - "memd(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memd(##global) = Rtt -let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STd_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if ($src1) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memd(##global) = Rtt -def STd_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if (!$src1) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memd(##global) = Rtt -def STd_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if ($src1.new) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memd(##global) = Rtt -def STd_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2), - "if (!$src1.new) memd(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// memb(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STb_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memb(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STb_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -def STb_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memb(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// memh(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STh_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memh(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STh_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -def STh_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memh(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// memw(#global)=Rt -let isPredicable = 1, neverHasSideEffects = 1, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STw_GP_V4 : STInst2<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memw(#$global) = $src", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -let neverHasSideEffects = 1, isPredicated = 1, isNVStorable = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STw_GP_cPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -def STw_GP_cdnPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cdnNotPt_V4 : STInst2<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memw(##$global) = $src2", - []>, - Requires<[HasV4T]>; -} - -// 64 bit atomic store -def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), - (i64 DoubleRegs:$src1)), - (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memd(#foo) -let AddedComplexity = 100 in -def : Pat <(store (i64 DoubleRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, - Requires<[HasV4T]>; - -// 8 bit atomic store -def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memb(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei8 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" -// to "r0 = 1; memw(#foo) = r0" -let AddedComplexity = 100 in -def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>, - Requires<[HasV4T]>; - -def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memh(#foo) -let AddedComplexity = 100 in -def : Pat<(truncstorei16 (i32 IntRegs:$src1), - (HexagonCONST32_GP tglobaladdr:$global)), - (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// 32 bit atomic store -def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), - (i32 IntRegs:$src1)), - (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; - -// Map from store(globaladdress) -> memw(#foo) -let AddedComplexity = 100 in -def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), - (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>, - Requires<[HasV4T]>; //===----------------------------------------------------------------------=== // ST - @@ -1772,15 +1072,6 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; // memb(Rx++I:circ(Mu))=Nt.new // memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new - -// memb(#global)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STb_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memb(#$global) = $src.new", - []>, - Requires<[HasV4T]>; - // memh(Ru<<#u2+#U6)=Nt.new let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, isNVStore = 1, validSubTargets = HasV4SubT in @@ -1795,14 +1086,6 @@ def STrih_shl_nv_V4 : NVInst_V4<(outs), // memh(Rx++Mu)=Nt.new // memh(Rx++Mu:brev)=Nt.new -// memh(#global)=Nt.new -let mayStore = 1, neverHasSideEffects = 1 in -def STh_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memh(#$global) = $src.new", - []>, - Requires<[HasV4T]>; - // memw(Ru<<#u2+#U6)=Nt.new let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, isNVStore = 1, validSubTargets = HasV4SubT in @@ -1816,102 +1099,6 @@ def STriw_shl_nv_V4 : NVInst_V4<(outs), // memw(Rx++I:circ(Mu))=Nt.new // memw(Rx++Mu)=Nt.new // memw(Rx++Mu:brev)=Nt.new -// memw(gp+#u16:2)=Nt.new - -let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, -validSubTargets = HasV4SubT in -def STw_GP_nv_V4 : NVInst_V4<(outs), - (ins globaladdress:$global, IntRegs:$src), - "memw(#$global) = $src.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -let mayStore = 1, neverHasSideEffects = 1, isNVStore = 1, -isExtended = 1, opExtendable = 1, validSubTargets = HasV4SubT in { -def STb_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memb(##global) = Rt -def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memb(##global) = Rt -def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memb(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -def STh_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memh(##global) = Rt -def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memh(##global) = Rt -def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memh(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -def STw_GP_cPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (Pv) memw(##global) = Rt -def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if ($src1.new) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; - -// if (!Pv) memw(##global) = Rt -def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs), - (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2), - "if (!$src1.new) memw(##$global) = $src2.new", - []>, - Requires<[HasV4T]>; -} //===----------------------------------------------------------------------===// // NV/ST - @@ -2658,414 +1845,367 @@ def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst), // MEMOP: Word, Half, Byte //===----------------------------------------------------------------------===// +def MEMOPIMM : SDNodeXForm<imm, [{ + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int32_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def MEMOPIMM_HALF : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 65535..65515 + // assigning to a short restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int16_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def MEMOPIMM_BYTE : SDNodeXForm<imm, [{ + // -1 .. -31 represented as 255..235 + // assigning to a char restores our desired signed value. + // Call the transformation function XformM5ToU5Imm to get the negative + // immediate's positive counterpart. + int8_t imm = N->getSExtValue(); + return XformM5ToU5Imm(imm); +}]>; + +def SETMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-31]. + // As an SDNode. + int32_t imm = N->getSExtValue(); + return XformMskToBitPosU5Imm(imm); +}]>; + +def CLRMEMIMM : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-31]. + // As an SDNode. + // we bit negate the value first + int32_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU5Imm(imm); +}]>; + +def SETMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-15]. + // As an SDNode. + int16_t imm = N->getSExtValue(); + return XformMskToBitPosU4Imm(imm); +}]>; + +def CLRMEMIMM_SHORT : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-15]. + // As an SDNode. + // we bit negate the value first + int16_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU4Imm(imm); +}]>; + +def SETMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will set [0-7]. + // As an SDNode. + int8_t imm = N->getSExtValue(); + return XformMskToBitPosU3Imm(imm); +}]>; + +def CLRMEMIMM_BYTE : SDNodeXForm<imm, [{ + // Return the bit position we will clear [0-7]. + // As an SDNode. + // we bit negate the value first + int8_t imm = ~(N->getSExtValue()); + return XformMskToBitPosU3Imm(imm); +}]>; + +//===----------------------------------------------------------------------===// +// Template class for MemOp instructions with the register value. +//===----------------------------------------------------------------------===// +class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4<(outs), + (ins IntRegs:$base, ImmOp:$offset, IntRegs:$delta), + opc#"($base+#$offset)"#memOp#"$delta", + []>, + Requires<[HasV4T, UseMEMOP]> { + + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); + + let IClass = 0b0011; + let Inst{27-24} = 0b1110; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} + //===----------------------------------------------------------------------===// -// MEMOP: Word -// -// Implemented: -// MEMw_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 -// MEMw_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 -// MEMw_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt -// MEMw_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt -// MEMw_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt -// MEMw_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt -// MEMw_ADDi_V4 : memw(Rs+#u6:2)+=#U5 -// MEMw_SUBi_V4 : memw(Rs+#u6:2)-=#U5 -// MEMw_ADDr_V4 : memw(Rs+#u6:2)+=Rt -// MEMw_SUBr_V4 : memw(Rs+#u6:2)-=Rt -// MEMw_CLRr_V4 : memw(Rs+#u6:2)&=Rt -// MEMw_SETr_V4 : memw(Rs+#u6:2)|=Rt -// -// Not implemented: -// MEMw_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMw_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) -// MEMw_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMw_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +// Template class for MemOp instructions with the immediate value. //===----------------------------------------------------------------------===// +class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp, + string memOp, bits<2> memOpBits> : + MEMInst_V4 <(outs), + (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta), + opc#"($base+#$offset)"#memOp#"#$delta" + #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')' + []>, + Requires<[HasV4T, UseMEMOP]> { + bits<5> base; + bits<5> delta; + bits<32> offset; + bits<6> offsetBits; // memb - u6:0 , memh - u6:1, memw - u6:2 + let offsetBits = !if (!eq(opcBits, 0b00), offset{5-0}, + !if (!eq(opcBits, 0b01), offset{6-1}, + !if (!eq(opcBits, 0b10), offset{7-2},0))); -// memw(Rs+#u6:2) += #U5 -let AddedComplexity = 30 in -def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend), - "memw($base+#$offset) += #$addend", - []>, - Requires<[HasV4T, UseMEMOP]>; + let IClass = 0b0011; + let Inst{27-24} = 0b1111; + let Inst{22-21} = opcBits; + let Inst{20-16} = base; + let Inst{13} = 0b0; + let Inst{12-7} = offsetBits; + let Inst{6-5} = memOpBits; + let Inst{4-0} = delta; +} -// memw(Rs+#u6:2) -= #U5 -let AddedComplexity = 30 in -def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend), - "memw($base+#$offset) -= #$subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) += Rt -let AddedComplexity = 30 in -def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend), - "memw($base+#$offset) += $addend", - [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$addend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) -= Rt -let AddedComplexity = 30 in -def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend), - "memw($base+#$offset) -= $subend", - [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$subend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) &= Rt -let AddedComplexity = 30 in -def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend), - "memw($base+#$offset) &= $andend", - [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$andend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) |= Rt -let AddedComplexity = 30 in -def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend), - "memw($base+#$offset) |= $orend", - [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)), - (i32 IntRegs:$orend)), - (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) += #U5 -let AddedComplexity = 30 in -def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$addend), - "memw($addr) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; +// multiclass to define MemOp instructions with register operand. +multiclass MemOp_rr<string opc, bits<2> opcBits, Operand ImmOp> { + def _ADD#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " += ", 0b00>; // add + def _SUB#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " -= ", 0b01>; // sub + def _AND#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " &= ", 0b10>; // and + def _OR#NAME#_V4 : MemOp_rr_base <opc, opcBits, ImmOp, " |= ", 0b11>; // or +} -// memw(Rs+#u6:2) -= #U5 -let AddedComplexity = 30 in -def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$subend), - "memw($addr) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) += Rt -let AddedComplexity = 30 in -def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$addend), - "memw($addr) += $addend", - [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) -= Rt -let AddedComplexity = 30 in -def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$subend), - "memw($addr) -= $subend", - [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) &= Rt -let AddedComplexity = 30 in -def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$andend), - "memw($addr) &= $andend", - [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memw(Rs+#u6:2) |= Rt -let AddedComplexity = 30 in -def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$orend), - "memw($addr) |= $orend", - [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)), - ADDRriU6_2:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; +// multiclass to define MemOp instructions with immediate Operand. +multiclass MemOp_ri<string opc, bits<2> opcBits, Operand ImmOp> { + def _ADD#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " += ", 0b00 >; + def _SUB#NAME#_V4 : MemOp_ri_base <opc, opcBits, ImmOp, " -= ", 0b01 >; + def _CLRBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =clrbit(", 0b10>; + def _SETBIT#NAME#_V4 : MemOp_ri_base<opc, opcBits, ImmOp, " =setbit(", 0b11>; +} + +multiclass MemOp_base <string opc, bits<2> opcBits, Operand ImmOp> { + defm r : MemOp_rr <opc, opcBits, ImmOp>; + defm i : MemOp_ri <opc, opcBits, ImmOp>; +} + +// Define MemOp instructions. +let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, +validSubTargets =HasV4SubT in { + let opExtentBits = 6, accessSize = ByteAccess in + defm MemOPb : MemOp_base <"memb", 0b00, u6_0Ext>; + + let opExtentBits = 7, accessSize = HalfWordAccess in + defm MemOPh : MemOp_base <"memh", 0b01, u6_1Ext>; + + let opExtentBits = 8, accessSize = WordAccess in + defm MemOPw : MemOp_base <"memw", 0b10, u6_2Ext>; +} //===----------------------------------------------------------------------===// -// MEMOP: Halfword -// -// Implemented: -// MEMh_ADDi_indexed_V4 : memw(Rs+#u6:2)+=#U5 -// MEMh_SUBi_indexed_V4 : memw(Rs+#u6:2)-=#U5 -// MEMh_ADDr_indexed_V4 : memw(Rs+#u6:2)+=Rt -// MEMh_SUBr_indexed_V4 : memw(Rs+#u6:2)-=Rt -// MEMh_CLRr_indexed_V4 : memw(Rs+#u6:2)&=Rt -// MEMh_SETr_indexed_V4 : memw(Rs+#u6:2)|=Rt -// MEMh_ADDi_V4 : memw(Rs+#u6:2)+=#U5 -// MEMh_SUBi_V4 : memw(Rs+#u6:2)-=#U5 -// MEMh_ADDr_V4 : memw(Rs+#u6:2)+=Rt -// MEMh_SUBr_V4 : memw(Rs+#u6:2)-=Rt -// MEMh_CLRr_V4 : memw(Rs+#u6:2)&=Rt -// MEMh_SETr_V4 : memw(Rs+#u6:2)|=Rt -// -// Not implemented: -// MEMh_CLRi_indexed_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMh_SETi_indexed_V4 : memw(Rs+#u6:2)=setbit(#U5) -// MEMh_CLRi_V4 : memw(Rs+#u6:2)=clrbit(#U5) -// MEMh_SETi_V4 : memw(Rs+#u6:2)=setbit(#U5) +// Multiclass to define 'Def Pats' for ALU operations on the memory +// Here value used for the ALU operation is an immediate value. +// mem[bh](Rs+#0) += #U5 +// mem[bh](Rs+#u6) += #U5 //===----------------------------------------------------------------------===// +multiclass MemOpi_u5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, + InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 180 in + def : Pat < (stOp (OpNode (ldOp IntRegs:$addr), u5ImmPred:$addend), + IntRegs:$addr), + (MI IntRegs:$addr, #0, u5ImmPred:$addend )>; -// memh(Rs+#u6:1) += #U5 -let AddedComplexity = 30 in -def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend), - "memh($base+#$offset) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; + let AddedComplexity = 190 in + def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, ExtPred:$offset)), + u5ImmPred:$addend), + (add IntRegs:$base, ExtPred:$offset)), + (MI IntRegs:$base, ExtPred:$offset, u5ImmPred:$addend)>; +} -// memh(Rs+#u6:1) -= #U5 -let AddedComplexity = 30 in -def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend), - "memh($base+#$offset) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) += Rt -let AddedComplexity = 30 in -def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend), - "memh($base+#$offset) += $addend", - [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$addend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) -= Rt -let AddedComplexity = 30 in -def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend), - "memh($base+#$offset) -= $subend", - [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$subend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) &= Rt -let AddedComplexity = 30 in -def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend), - "memh($base+#$offset) += $andend", - [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$andend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) |= Rt -let AddedComplexity = 30 in -def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend), - "memh($base+#$offset) |= $orend", - [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base), - u6_1ImmPred:$offset)), - (i32 IntRegs:$orend)), - (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) += #U5 -let AddedComplexity = 30 in -def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$addend), - "memh($addr) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_u5ALUOp<PatFrag ldOp, PatFrag stOp, PatLeaf ExtPred, + InstHexagon addMI, InstHexagon subMI> { + defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, addMI, add>; + defm : MemOpi_u5Pats<ldOp, stOp, ExtPred, subMI, sub>; +} -// memh(Rs+#u6:1) -= #U5 -let AddedComplexity = 30 in -def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$subend), - "memh($addr) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) += Rt -let AddedComplexity = 30 in -def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$addend), - "memh($addr) += $addend", - [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) -= Rt -let AddedComplexity = 30 in -def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$subend), - "memh($addr) -= $subend", - [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) &= Rt -let AddedComplexity = 30 in -def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$andend), - "memh($addr) &= $andend", - [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memh(Rs+#u6:1) |= Rt -let AddedComplexity = 30 in -def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$orend), - "memh($addr) |= $orend", - [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr), - (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_u5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOpi_u5ALUOp <ldOpHalf, truncstorei16, u6_1ExtPred, + MemOPh_ADDi_V4, MemOPh_SUBi_V4>; + // Byte + defm : MemOpi_u5ALUOp <ldOpByte, truncstorei8, u6ExtPred, + MemOPb_ADDi_V4, MemOPb_SUBi_V4>; +} +let Predicates = [HasV4T, UseMEMOP] in { + defm : MemOpi_u5ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_u5ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_u5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm : MemOpi_u5ALUOp <load, store, u6_2ExtPred, MemOPw_ADDi_V4, + MemOPw_SUBi_V4>; +} //===----------------------------------------------------------------------===// -// MEMOP: Byte -// -// Implemented: -// MEMb_ADDi_indexed_V4 : memb(Rs+#u6:0)+=#U5 -// MEMb_SUBi_indexed_V4 : memb(Rs+#u6:0)-=#U5 -// MEMb_ADDr_indexed_V4 : memb(Rs+#u6:0)+=Rt -// MEMb_SUBr_indexed_V4 : memb(Rs+#u6:0)-=Rt -// MEMb_CLRr_indexed_V4 : memb(Rs+#u6:0)&=Rt -// MEMb_SETr_indexed_V4 : memb(Rs+#u6:0)|=Rt -// MEMb_ADDi_V4 : memb(Rs+#u6:0)+=#U5 -// MEMb_SUBi_V4 : memb(Rs+#u6:0)-=#U5 -// MEMb_ADDr_V4 : memb(Rs+#u6:0)+=Rt -// MEMb_SUBr_V4 : memb(Rs+#u6:0)-=Rt -// MEMb_CLRr_V4 : memb(Rs+#u6:0)&=Rt -// MEMb_SETr_V4 : memb(Rs+#u6:0)|=Rt -// -// Not implemented: -// MEMb_CLRi_indexed_V4 : memb(Rs+#u6:0)=clrbit(#U5) -// MEMb_SETi_indexed_V4 : memb(Rs+#u6:0)=setbit(#U5) -// MEMb_CLRi_V4 : memb(Rs+#u6:0)=clrbit(#U5) -// MEMb_SETi_V4 : memb(Rs+#u6:0)=setbit(#U5) +// multiclass to define 'Def Pats' for ALU operations on the memory. +// Here value used for the ALU operation is a negative value. +// mem[bh](Rs+#0) += #m5 +// mem[bh](Rs+#u6) += #m5 //===----------------------------------------------------------------------===// -// memb(Rs+#u6:0) += #U5 -let AddedComplexity = 30 in -def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend), - "memb($base+#$offset) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_m5Pats <PatFrag ldOp, PatFrag stOp, PatLeaf extPred, + PatLeaf immPred, ComplexPattern addrPred, + SDNodeXForm xformFunc, InstHexagon MI> { + let AddedComplexity = 190 in + def : Pat <(stOp (add (ldOp IntRegs:$addr), immPred:$subend), + IntRegs:$addr), + (MI IntRegs:$addr, #0, (xformFunc immPred:$subend) )>; -// memb(Rs+#u6:0) -= #U5 -let AddedComplexity = 30 in -def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend), - "memb($base+#$offset) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) += Rt -let AddedComplexity = 30 in -def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend), - "memb($base+#$offset) += $addend", - [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$addend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) -= Rt -let AddedComplexity = 30 in -def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend), - "memb($base+#$offset) -= $subend", - [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$subend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) &= Rt -let AddedComplexity = 30 in -def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend), - "memb($base+#$offset) += $andend", - [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$andend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) |= Rt -let AddedComplexity = 30 in -def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs), - (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend), - "memb($base+#$offset) |= $orend", - [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base), - u6_0ImmPred:$offset)), - (i32 IntRegs:$orend)), - (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) += #U5 -let AddedComplexity = 30 in -def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$addend), - "memb($addr) += $addend", - []>, - Requires<[HasV4T, UseMEMOP]>; + let AddedComplexity = 195 in + def : Pat<(stOp (add (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$subend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$subend))>; +} -// memb(Rs+#u6:0) -= #U5 -let AddedComplexity = 30 in -def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, u5Imm:$subend), - "memb($addr) -= $subend", - []>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) += Rt -let AddedComplexity = 30 in -def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$addend), - "memb($addr) += $addend", - [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) -= Rt -let AddedComplexity = 30 in -def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$subend), - "memb($addr) -= $subend", - [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) &= Rt -let AddedComplexity = 30 in -def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$andend), - "memb($addr) &= $andend", - [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; - -// memb(Rs+#u6:0) |= Rt -let AddedComplexity = 30 in -def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs), - (ins MEMri:$addr, IntRegs:$orend), - "memb($addr) |= $orend", - [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr), - (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>, - Requires<[HasV4T, UseMEMOP]>; +multiclass MemOpi_m5ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOpi_m5Pats <ldOpHalf, truncstorei16, u6_1ExtPred, m5HImmPred, + ADDRriU6_1, MEMOPIMM_HALF, MemOPh_SUBi_V4>; + // Byte + defm : MemOpi_m5Pats <ldOpByte, truncstorei8, u6ExtPred, m5BImmPred, + ADDRriU6_0, MEMOPIMM_BYTE, MemOPb_SUBi_V4>; +} +let Predicates = [HasV4T, UseMEMOP] in { + defm : MemOpi_m5ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_m5ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_m5ExtType<extloadi8, extloadi16>; // any extend + + // Word + defm : MemOpi_m5Pats <load, store, u6_2ExtPred, m5ImmPred, + ADDRriU6_2, MEMOPIMM, MemOPw_SUBi_V4>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for bit operations on the memory. +// mem[bhw](Rs+#0) = [clrbit|setbit](#U5) +// mem[bhw](Rs+#u6) = [clrbit|setbit](#U5) +//===----------------------------------------------------------------------===// + +multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred, + PatLeaf extPred, ComplexPattern addrPred, + SDNodeXForm xformFunc, InstHexagon MI, SDNode OpNode> { + + // mem[bhw](Rs+#u6:[012]) = [clrbit|setbit](#U5) + let AddedComplexity = 250 in + def : Pat<(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + immPred:$bitend), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (xformFunc immPred:$bitend))>; + + // mem[bhw](Rs+#0) = [clrbit|setbit](#U5) + let AddedComplexity = 225 in + def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend), + addrPred:$addr), + (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>; +} + +multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Byte - clrbit + defm : MemOpi_bitPats<ldOpByte, truncstorei8, Clr3ImmPred, u6ExtPred, + ADDRriU6_0, CLRMEMIMM_BYTE, MemOPb_CLRBITi_V4, and>; + // Byte - setbit + defm : MemOpi_bitPats<ldOpByte, truncstorei8, Set3ImmPred, u6ExtPred, + ADDRriU6_0, SETMEMIMM_BYTE, MemOPb_SETBITi_V4, or>; + // Half Word - clrbit + defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Clr4ImmPred, u6_1ExtPred, + ADDRriU6_1, CLRMEMIMM_SHORT, MemOPh_CLRBITi_V4, and>; + // Half Word - setbit + defm : MemOpi_bitPats<ldOpHalf, truncstorei16, Set4ImmPred, u6_1ExtPred, + ADDRriU6_1, SETMEMIMM_SHORT, MemOPh_SETBITi_V4, or>; +} + +let Predicates = [HasV4T, UseMEMOP] in { + // mem[bh](Rs+#0) = [clrbit|setbit](#U5) + // mem[bh](Rs+#u6:[01]) = [clrbit|setbit](#U5) + defm : MemOpi_bitExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOpi_bitExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOpi_bitExtType<extloadi8, extloadi16>; // any extend + + // memw(Rs+#0) = [clrbit|setbit](#U5) + // memw(Rs+#u6:2) = [clrbit|setbit](#U5) + defm : MemOpi_bitPats<load, store, Clr5ImmPred, u6_2ExtPred, ADDRriU6_2, + CLRMEMIMM, MemOPw_CLRBITi_V4, and>; + defm : MemOpi_bitPats<load, store, Set5ImmPred, u6_2ExtPred, ADDRriU6_2, + SETMEMIMM, MemOPw_SETBITi_V4, or>; +} + +//===----------------------------------------------------------------------===// +// Multiclass to define 'def Pats' for ALU operations on the memory +// where addend is a register. +// mem[bhw](Rs+#0) [+-&|]= Rt +// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt +//===----------------------------------------------------------------------===// + +multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred, + PatLeaf extPred, InstHexagon MI, SDNode OpNode> { + let AddedComplexity = 141 in + // mem[bhw](Rs+#0) [+-&|]= Rt + def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)), + addrPred:$addr), + (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>; + + // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt + let AddedComplexity = 150 in + def : Pat <(stOp (OpNode (ldOp (add IntRegs:$base, extPred:$offset)), + (i32 IntRegs:$orend)), + (add IntRegs:$base, extPred:$offset)), + (MI IntRegs:$base, extPred:$offset, (i32 IntRegs:$orend) )>; +} + +multiclass MemOPr_ALUOp<PatFrag ldOp, PatFrag stOp, + ComplexPattern addrPred, PatLeaf extPred, + InstHexagon addMI, InstHexagon subMI, + InstHexagon andMI, InstHexagon orMI > { + + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, addMI, add>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, subMI, sub>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, andMI, and>; + defm : MemOpr_Pats <ldOp, stOp, addrPred, extPred, orMI, or>; +} + +multiclass MemOPr_ExtType<PatFrag ldOpByte, PatFrag ldOpHalf > { + // Half Word + defm : MemOPr_ALUOp <ldOpHalf, truncstorei16, ADDRriU6_1, u6_1ExtPred, + MemOPh_ADDr_V4, MemOPh_SUBr_V4, + MemOPh_ANDr_V4, MemOPh_ORr_V4>; + // Byte + defm : MemOPr_ALUOp <ldOpByte, truncstorei8, ADDRriU6_0, u6ExtPred, + MemOPb_ADDr_V4, MemOPb_SUBr_V4, + MemOPb_ANDr_V4, MemOPb_ORr_V4>; +} + +// Define 'def Pats' for MemOps with register addend. +let Predicates = [HasV4T, UseMEMOP] in { + // Byte, Half Word + defm : MemOPr_ExtType<zextloadi8, zextloadi16>; // zero extend + defm : MemOPr_ExtType<sextloadi8, sextloadi16>; // sign extend + defm : MemOPr_ExtType<extloadi8, extloadi16>; // any extend + // Word + defm : MemOPr_ALUOp <load, store, ADDRriU6_2, u6_2ExtPred, MemOPw_ADDr_V4, + MemOPw_SUBr_V4, MemOPw_ANDr_V4, MemOPw_ORr_V4 >; +} //===----------------------------------------------------------------------===// // XTYPE/PRED + @@ -3730,6 +2870,108 @@ def : Pat<(store (i64 DoubleRegs:$src1), (STrid_abs_V4 tglobaladdr: $absaddr, DoubleRegs: $src1)>; } +//===----------------------------------------------------------------------===// +// multiclass for store instructions with GP-relative addressing mode. +// mem[bhwd](#global)=Rt +// if ([!]Pv[.new]) mem[bhwd](##global) = Rt +//===----------------------------------------------------------------------===// +multiclass ST_GP<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp, isPredicable = 1 in + def NAME#_V4 : STInst2<(outs), + (ins globaladdress:$global, RC:$src), + mnemonic#"(#$global) = $src", + []>; + + // When GP-relative instructions are predicated, their addressing mode is + // changed to absolute and they are always constant extended. + let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, + isPredicated = 1 in { + defm Pt : ST_Abs_Pred <mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred <mnemonic, RC, 1>; + } +} + +let mayStore = 1, isNVStore = 1 in +multiclass ST_GP_nv<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp, isPredicable = 1 in + def NAME#_nv_V4 : NVInst_V4<(outs), + (ins u0AlwaysExt:$global, RC:$src), + mnemonic#"(#$global) = $src.new", + []>, + Requires<[HasV4T]>; + + // When GP-relative instructions are predicated, their addressing mode is + // changed to absolute and they are always constant extended. + let BaseOpcode = BaseOp, isExtended = 1, opExtendable = 1, + isPredicated = 1 in { + defm Pt : ST_Abs_Pred_nv<mnemonic, RC, 0>; + defm NotPt : ST_Abs_Pred_nv<mnemonic, RC, 1>; + } +} + +let validSubTargets = HasV4SubT, validSubTargets = HasV4SubT in { +defm STd_GP : ST_GP <"memd", "STd_GP", DoubleRegs>, + ST_GP_nv<"memd", "STd_GP", DoubleRegs>, NewValueRel ; +defm STb_GP : ST_GP<"memb", "STb_GP", IntRegs>, + ST_GP_nv<"memb", "STb_GP", IntRegs>, NewValueRel ; +defm STh_GP : ST_GP<"memh", "STh_GP", IntRegs>, + ST_GP_nv<"memh", "STh_GP", IntRegs>, NewValueRel ; +defm STw_GP : ST_GP<"memw", "STw_GP", IntRegs>, + ST_GP_nv<"memw", "STw_GP", IntRegs>, NewValueRel ; +} + +// 64 bit atomic store +def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global), + (i64 DoubleRegs:$src1)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>, + Requires<[HasV4T]>; + +// Map from store(globaladdress) -> memd(#foo) +let AddedComplexity = 100 in +def : Pat <(store (i64 DoubleRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>; + +// 8 bit atomic store +def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei8 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" +// to "r0 = 1; memw(#foo) = r0" +let AddedComplexity = 100 in +def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), + (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>; + +def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat<(truncstorei16 (i32 IntRegs:$src1), + (HexagonCONST32_GP tglobaladdr:$global)), + (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// 32 bit atomic store +def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global), + (i32 IntRegs:$src1)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +// Map from store(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)), + (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>; + +//===----------------------------------------------------------------------===// +// Multiclass for the load instructions with absolute addressing mode. +//===----------------------------------------------------------------------===// multiclass LD_Abs_Predbase<string mnemonic, RegisterClass RC, bit isNot, bit isPredNew> { let PNewValue = !if(isPredNew, "new", "") in @@ -3795,6 +3037,107 @@ let Predicates = [HasV4T], AddedComplexity=30 in def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))), (LDriuh_abs_V4 tglobaladdr:$absaddr)>; +//===----------------------------------------------------------------------===// +// multiclass for load instructions with GP-relative addressing mode. +// Rx=mem[bhwd](##global) +// if ([!]Pv[.new]) Rx=mem[bhwd](##global) +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1, validSubTargets = HasV4SubT in +multiclass LD_GP<string mnemonic, string BaseOp, RegisterClass RC> { + let BaseOpcode = BaseOp in { + let isPredicable = 1 in + def NAME#_V4 : LDInst2<(outs RC:$dst), + (ins globaladdress:$global), + "$dst = "#mnemonic#"(#$global)", + []>; + + let isExtended = 1, opExtendable = 2, isPredicated = 1 in { + defm Pt_V4 : LD_Abs_Pred<mnemonic, RC, 0>; + defm NotPt_V4 : LD_Abs_Pred<mnemonic, RC, 1>; + } + } +} + +defm LDd_GP : LD_GP<"memd", "LDd_GP", DoubleRegs>; +defm LDb_GP : LD_GP<"memb", "LDb_GP", IntRegs>; +defm LDub_GP : LD_GP<"memub", "LDub_GP", IntRegs>; +defm LDh_GP : LD_GP<"memh", "LDh_GP", IntRegs>; +defm LDuh_GP : LD_GP<"memuh", "LDuh_GP", IntRegs>; +defm LDw_GP : LD_GP<"memw", "LDw_GP", IntRegs>; + +def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)), + (i64 (LDd_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDw_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>; + +def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memw(#foo + 0) +let AddedComplexity = 100 in +def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i64 (LDd_GP_V4 tglobaladdr:$global))>; + +// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd +let AddedComplexity = 100 in +def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>; + +// When the Interprocedural Global Variable optimizer realizes that a certain +// global variable takes only two constant values, it shrinks the global to +// a boolean. Catch those loads here in the following 3 patterns. +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memb(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDb_GP_V4 tglobaladdr:$global))>; + +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memub(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDub_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memuh(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDuh_GP_V4 tglobaladdr:$global))>; + +// Map from load(globaladdress) -> memw(#foo) +let AddedComplexity = 100 in +def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))), + (i32 (LDw_GP_V4 tglobaladdr:$global))>; + + // Transfer global address into a register let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1), diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h deleted file mode 100644 index e16636ea48..0000000000 --- a/lib/Target/Hexagon/HexagonMCInst.h +++ /dev/null @@ -1,41 +0,0 @@ -//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class extends MCInst to allow some VLIW annotation. -// -//===----------------------------------------------------------------------===// - -#ifndef HEXAGONMCINST_H -#define HEXAGONMCINST_H - -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCInst.h" - -namespace llvm { - class HexagonMCInst: public MCInst { - // Packet start and end markers - unsigned startPacket: 1, endPacket: 1; - const MachineInstr *MachineI; - public: - explicit HexagonMCInst(): MCInst(), - startPacket(0), endPacket(0) {} - - const MachineInstr* getMI() const { return MachineI; } - - void setMI(const MachineInstr *MI) { MachineI = MI; } - - bool isStartPacket() const { return (startPacket); } - bool isEndPacket() const { return (endPacket); } - - void setStartPacket(bool yes) { startPacket = yes; } - void setEndPacket(bool yes) { endPacket = yes; } - }; -} - -#endif diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index cd3d2898d0..5e80e48b01 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -220,7 +220,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, return false; } - unsigned cmpReg1, cmpOp2 = 0; // cmpOp2 assignment silences compiler warning. + unsigned cmpReg1, cmpOp2; cmpReg1 = MI->getOperand(1).getReg(); if (secondReg) { diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index f947dfcdf9..d8b4e2fcb3 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -14,25 +14,26 @@ #include "HexagonRegisterInfo.h" #include "Hexagon.h" -#include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" #include "HexagonTargetMachine.h" +#include "HexagonMachineFunctionInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/Function.h" #include "llvm/IR/Type.h" #include "llvm/MC/MachineLocation.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -215,28 +216,41 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false,true); MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); } else if (TII.isMemOp(&MI)) { - unsigned resReg = HEXAGON_RESERVED_REG_1; - if (!MFI.hasVarSizedObjects() && - TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { - MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), - false, false, true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); - } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset); - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_rr), - resReg).addReg(FrameReg).addReg(resReg); - MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + // use the constant extender if the instruction provides it + // and we are V4TOps. + if (Subtarget.hasV4TOps()) { + if (TII.isConstExtended(&MI)) { + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + TII.immediateExtend(&MI); + } else { + llvm_unreachable("Need to implement for memops"); + } } else { - BuildMI(*MI.getParent(), II, MI.getDebugLoc(), - TII.get(Hexagon::ADD_ri), - resReg).addReg(FrameReg).addImm(Offset); - MI.getOperand(FIOperandNum).ChangeToRegister(resReg, false, false, - true); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + // Only V3 and older instructions here. + unsigned ResReg = HEXAGON_RESERVED_REG_1; + if (!MFI.hasVarSizedObjects() && + TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) { + MI.getOperand(FIOperandNum).ChangeToRegister(getStackRegister(), + false, false, false); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(FrameSize+Offset); + } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::CONST32_Int_Real), ResReg).addImm(Offset); + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_rr), ResReg).addReg(FrameReg). + addReg(ResReg); + MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } else { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(Hexagon::ADD_ri), ResReg).addReg(FrameReg). + addImm(Offset); + MI.getOperand(FIOperandNum).ChangeToRegister(ResReg, false, false, + true); + MI.getOperand(FIOperandNum+1).ChangeToImmediate(0); + } } } else { unsigned dstReg = MI.getOperand(0).getReg(); diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 4bacb8fa67..07d5ce1d8a 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -29,8 +29,16 @@ EnableV3("enable-hexagon-v3", cl::Hidden, static cl::opt<bool> EnableMemOps( "enable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, - cl::desc("Generate V4 memop instructions.")); + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), + cl::desc( + "Generate V4 MEMOP in code generation for Hexagon target")); + +static cl::opt<bool> +DisableMemOps( + "disable-hexagon-memops", + cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), + cl::desc( + "Do not generate V4 MEMOP in code generation for Hexagon target")); static cl::opt<bool> EnableIEEERndNear( @@ -64,7 +72,10 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if (EnableMemOps) + // UseMemOps on by default unless disabled explicitly + if (DisableMemOps) + UseMemOps = false; + else if (EnableMemOps) UseMemOps = true; else UseMemOps = false; diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index d9fef3e455..ce45c626f7 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -35,6 +35,10 @@ opt<bool> DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon MI Scheduling")); +static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt", + cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Disable Hexagon CFG Optimization")); + /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get @@ -75,19 +79,20 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, TSInfo(*this), FrameLowering(Subtarget), InstrItins(&Subtarget.getInstrItineraryData()) { - setMCUseCFI(false); + setMCUseCFI(false); } // addPassesForOptimizations - Allow the backend (target) to add Target // Independent Optimization passes to the Pass Manager. bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { - - PM.add(createConstantPropagationPass()); - PM.add(createLoopSimplifyPass()); - PM.add(createDeadCodeEliminationPass()); - PM.add(createConstantPropagationPass()); - PM.add(createLoopUnrollPass()); - PM.add(createLoopStrengthReducePass()); + if (getOptLevel() != CodeGenOpt::None) { + PM.add(createConstantPropagationPass()); + PM.add(createLoopSimplifyPass()); + PM.add(createDeadCodeEliminationPass()); + PM.add(createConstantPropagationPass()); + PM.add(createLoopUnrollPass()); + PM.add(createLoopStrengthReducePass()); + } return true; } @@ -121,38 +126,45 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { } bool HexagonPassConfig::addInstSelector() { - addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonRemoveExtendOps(getHexagonTargetMachine())); + addPass(createHexagonISelDag(getHexagonTargetMachine(), getOptLevel())); - addPass(createHexagonPeephole()); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonPeephole()); + return false; } bool HexagonPassConfig::addPreRegAlloc() { - if (!DisableHardwareLoops) { + if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None) addPass(createHexagonHardwareLoops()); - } return false; } bool HexagonPassConfig::addPostRegAlloc() { - addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); + if (!DisableHexagonCFGOpt && getOptLevel() != CodeGenOpt::None) + addPass(createHexagonCFGOptimizer(getHexagonTargetMachine())); return true; } bool HexagonPassConfig::addPreSched2() { - addPass(&IfConverterID); + if (getOptLevel() != CodeGenOpt::None) + addPass(&IfConverterID); return true; } bool HexagonPassConfig::addPreEmitPass() { - if (!DisableHardwareLoops) { + if (!DisableHardwareLoops && getOptLevel() != CodeGenOpt::None) addPass(createHexagonFixupHwLoops()); - } - addPass(createHexagonNewValueJump()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonNewValueJump()); // Expand Spill code for predicate registers. addPass(createHexagonExpandPredSpillCode(getHexagonTargetMachine())); @@ -161,7 +173,8 @@ bool HexagonPassConfig::addPreEmitPass() { addPass(createHexagonSplitTFRCondSets(getHexagonTargetMachine())); // Create Packets. - addPass(createHexagonPacketizer()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createHexagonPacketizer()); return false; } diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 866beb1688..c0d86da1c0 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -149,7 +149,6 @@ namespace { bool canReserveResourcesForConstExt(MachineInstr *MI); void reserveResourcesForConstExt(MachineInstr* MI); bool isNewValueInst(MachineInstr* MI); - bool isDotNewInst(MachineInstr* MI); }; } @@ -2154,172 +2153,6 @@ static bool GetPredicateSense(MachineInstr* MI, return false; } -bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) { - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; - if (QII->isNewValueInst(MI)) - return true; - - switch (MI->getOpcode()) { - case Hexagon::TFR_cdnNotPt: - case Hexagon::TFR_cdnPt: - case Hexagon::TFRI_cdnNotPt: - case Hexagon::TFRI_cdnPt: - case Hexagon::LDrid_cdnPt : - case Hexagon::LDrid_cdnNotPt : - case Hexagon::LDrid_indexed_cdnPt : - case Hexagon::LDrid_indexed_cdnNotPt : - case Hexagon::POST_LDrid_cdnPt_V4 : - case Hexagon::POST_LDrid_cdnNotPt_V4 : - case Hexagon::LDriw_cdnPt : - case Hexagon::LDriw_cdnNotPt : - case Hexagon::LDriw_indexed_cdnPt : - case Hexagon::LDriw_indexed_cdnNotPt : - case Hexagon::POST_LDriw_cdnPt_V4 : - case Hexagon::POST_LDriw_cdnNotPt_V4 : - case Hexagon::LDrih_cdnPt : - case Hexagon::LDrih_cdnNotPt : - case Hexagon::LDrih_indexed_cdnPt : - case Hexagon::LDrih_indexed_cdnNotPt : - case Hexagon::POST_LDrih_cdnPt_V4 : - case Hexagon::POST_LDrih_cdnNotPt_V4 : - case Hexagon::LDrib_cdnPt : - case Hexagon::LDrib_cdnNotPt : - case Hexagon::LDrib_indexed_cdnPt : - case Hexagon::LDrib_indexed_cdnNotPt : - case Hexagon::POST_LDrib_cdnPt_V4 : - case Hexagon::POST_LDrib_cdnNotPt_V4 : - case Hexagon::LDriuh_cdnPt : - case Hexagon::LDriuh_cdnNotPt : - case Hexagon::LDriuh_indexed_cdnPt : - case Hexagon::LDriuh_indexed_cdnNotPt : - case Hexagon::POST_LDriuh_cdnPt_V4 : - case Hexagon::POST_LDriuh_cdnNotPt_V4 : - case Hexagon::LDriub_cdnPt : - case Hexagon::LDriub_cdnNotPt : - case Hexagon::LDriub_indexed_cdnPt : - case Hexagon::LDriub_indexed_cdnNotPt : - case Hexagon::POST_LDriub_cdnPt_V4 : - case Hexagon::POST_LDriub_cdnNotPt_V4 : - - case Hexagon::LDrid_indexed_shl_cdnPt_V4 : - case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrib_indexed_shl_cdnPt_V4 : - case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriub_indexed_shl_cdnPt_V4 : - case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDrih_indexed_shl_cdnPt_V4 : - case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriuh_indexed_shl_cdnPt_V4 : - case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 : - case Hexagon::LDriw_indexed_shl_cdnPt_V4 : - case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 : - -// Coditional add - case Hexagon::ADD_ri_cdnPt: - case Hexagon::ADD_ri_cdnNotPt: - case Hexagon::ADD_rr_cdnPt: - case Hexagon::ADD_rr_cdnNotPt: - - // Conditional logical operations - case Hexagon::XOR_rr_cdnPt : - case Hexagon::XOR_rr_cdnNotPt : - case Hexagon::AND_rr_cdnPt : - case Hexagon::AND_rr_cdnNotPt : - case Hexagon::OR_rr_cdnPt : - case Hexagon::OR_rr_cdnNotPt : - - // Conditonal subtract - case Hexagon::SUB_rr_cdnPt : - case Hexagon::SUB_rr_cdnNotPt : - - // Conditional combine - case Hexagon::COMBINE_rr_cdnPt : - case Hexagon::COMBINE_rr_cdnNotPt : - - // Conditional shift operations - case Hexagon::ASLH_cdnPt_V4: - case Hexagon::ASLH_cdnNotPt_V4: - case Hexagon::ASRH_cdnPt_V4: - case Hexagon::ASRH_cdnNotPt_V4: - case Hexagon::SXTB_cdnPt_V4: - case Hexagon::SXTB_cdnNotPt_V4: - case Hexagon::SXTH_cdnPt_V4: - case Hexagon::SXTH_cdnNotPt_V4: - case Hexagon::ZXTB_cdnPt_V4: - case Hexagon::ZXTB_cdnNotPt_V4: - case Hexagon::ZXTH_cdnPt_V4: - case Hexagon::ZXTH_cdnNotPt_V4: - - // Conditional stores - case Hexagon::STrib_imm_cdnPt_V4 : - case Hexagon::STrib_imm_cdnNotPt_V4 : - case Hexagon::STrib_cdnPt_V4 : - case Hexagon::STrib_cdnNotPt_V4 : - case Hexagon::STrib_indexed_cdnPt_V4 : - case Hexagon::STrib_indexed_cdnNotPt_V4 : - case Hexagon::POST_STbri_cdnPt_V4 : - case Hexagon::POST_STbri_cdnNotPt_V4 : - case Hexagon::STrib_indexed_shl_cdnPt_V4 : - case Hexagon::STrib_indexed_shl_cdnNotPt_V4 : - - // Store doubleword conditionally - case Hexagon::STrid_indexed_cdnPt_V4 : - case Hexagon::STrid_indexed_cdnNotPt_V4 : - case Hexagon::STrid_indexed_shl_cdnPt_V4 : - case Hexagon::STrid_indexed_shl_cdnNotPt_V4 : - case Hexagon::POST_STdri_cdnPt_V4 : - case Hexagon::POST_STdri_cdnNotPt_V4 : - - // Store halfword conditionally - case Hexagon::STrih_cdnPt_V4 : - case Hexagon::STrih_cdnNotPt_V4 : - case Hexagon::STrih_indexed_cdnPt_V4 : - case Hexagon::STrih_indexed_cdnNotPt_V4 : - case Hexagon::STrih_imm_cdnPt_V4 : - case Hexagon::STrih_imm_cdnNotPt_V4 : - case Hexagon::STrih_indexed_shl_cdnPt_V4 : - case Hexagon::STrih_indexed_shl_cdnNotPt_V4 : - case Hexagon::POST_SThri_cdnPt_V4 : - case Hexagon::POST_SThri_cdnNotPt_V4 : - - // Store word conditionally - case Hexagon::STriw_cdnPt_V4 : - case Hexagon::STriw_cdnNotPt_V4 : - case Hexagon::STriw_indexed_cdnPt_V4 : - case Hexagon::STriw_indexed_cdnNotPt_V4 : - case Hexagon::STriw_imm_cdnPt_V4 : - case Hexagon::STriw_imm_cdnNotPt_V4 : - case Hexagon::STriw_indexed_shl_cdnPt_V4 : - case Hexagon::STriw_indexed_shl_cdnNotPt_V4 : - case Hexagon::POST_STwri_cdnPt_V4 : - case Hexagon::POST_STwri_cdnNotPt_V4 : - - case Hexagon::LDd_GP_cdnPt_V4: - case Hexagon::LDd_GP_cdnNotPt_V4: - case Hexagon::LDb_GP_cdnPt_V4: - case Hexagon::LDb_GP_cdnNotPt_V4: - case Hexagon::LDub_GP_cdnPt_V4: - case Hexagon::LDub_GP_cdnNotPt_V4: - case Hexagon::LDh_GP_cdnPt_V4: - case Hexagon::LDh_GP_cdnNotPt_V4: - case Hexagon::LDuh_GP_cdnPt_V4: - case Hexagon::LDuh_GP_cdnNotPt_V4: - case Hexagon::LDw_GP_cdnPt_V4: - case Hexagon::LDw_GP_cdnNotPt_V4: - - case Hexagon::STd_GP_cdnPt_V4: - case Hexagon::STd_GP_cdnNotPt_V4: - case Hexagon::STb_GP_cdnPt_V4: - case Hexagon::STb_GP_cdnNotPt_V4: - case Hexagon::STh_GP_cdnPt_V4: - case Hexagon::STh_GP_cdnNotPt_V4: - case Hexagon::STw_GP_cdnPt_V4: - case Hexagon::STw_GP_cdnNotPt_V4: - return true; - } - return false; -} - static MachineOperand& GetPostIncrementOperand(MachineInstr *MI, const HexagonInstrInfo *QII) { assert(QII->isPostIncrement(MI) && "Not a post increment operation."); @@ -2490,7 +2323,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI, // sense, i.e, either both should be negated or both should be none negated. if (( predRegNumDst != predRegNumSrc) || - isDotNewInst(PacketMI) != isDotNewInst(MI) || + QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI) || GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) { return false; } @@ -2600,8 +2433,9 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI, MachineBasicBlock::iterator &MII, const TargetRegisterClass* RC ) { - // already a dot new instruction - if (isDotNewInst(MI) && !IsNewifyStore(MI)) + const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; + // Already a dot new instruction. + if (QII->isDotNewInst(MI) && !IsNewifyStore(MI)) return false; if (!isNewifiable(MI)) @@ -2616,7 +2450,6 @@ bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI, else { // Create a dot new machine instruction to see if resources can be // allocated. If not, bail out now. - const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII; int NewOpcode = GetDotNewOp(MI->getOpcode()); const MCInstrDesc &desc = QII->get(NewOpcode); DebugLoc dl; @@ -2759,7 +2592,7 @@ bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1, // !p0 is not complimentary to p0.new return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) && (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) && - (isDotNewInst(MI1) == isDotNewInst(MI2))); + (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2))); } // initPacketizerState - Initialize packetizer flags diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index 86f75d1c2d..3deb8d1deb 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -31,6 +31,7 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) { AscizDirective = "\t.string\t"; WeakRefDirective = "\t.weak\t"; + SupportsDebugInformation = true; UsesELFSectionDirectiveForBSS = true; ExceptionsType = ExceptionHandling::DwarfCFI; } diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp index ad495ff306..dda6e247ac 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp @@ -82,29 +82,35 @@ struct MBlazeOperand : public MCParsedAsmOperand { SMLoc StartLoc, EndLoc; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + unsigned OffReg; + const MCExpr *Off; + }; + + struct FslImmOp { + const MCExpr *Val; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - unsigned OffReg; - const MCExpr *Off; - } Mem; - - struct { - const MCExpr *Val; - } FslImm; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; + struct FslImmOp FslImm; }; MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index d0fd7dcec1..bd83afc1cc 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -122,7 +122,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, } void MBlazeRegisterInfo:: -processFunctionBeforeFrameFinalized(MachineFunction &MF) const { +processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *) const { // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 99a2fac95c..497f3866c9 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -55,7 +55,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index ae2e55617d..e504011dfd 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -285,8 +285,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, } void -MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index a077dd7351..c673f59b5e 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -50,7 +50,8 @@ public: bool hasFP(const MachineFunction &MF) const; bool hasReservedCallFrame(const MachineFunction &MF) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; }; } // End llvm namespace diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index ade6084752..c403f216b0 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -101,6 +101,9 @@ class MipsAsmParser : public MCTargetAsmParser { MipsAsmParser::OperandMatchResultTy parseCCRRegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterClass); + bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); @@ -119,6 +122,9 @@ class MipsAsmParser : public MCTargetAsmParser { SmallVectorImpl<MCInst> &Instructions); void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); + void expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd); bool reportParseError(StringRef ErrorMsg); bool parseMemOffset(const MCExpr *&Res); @@ -133,6 +139,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetReorderDirective(); bool parseSetNoReorderDirective(); + bool parseSetAssignment(); + bool parseDirectiveWord(unsigned Size, SMLoc L); MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); @@ -166,6 +174,9 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getReg(int RC,int RegNo); int getATReg(); + + bool processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions); public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser) : MCTargetAsmParser(), STI(sti), Parser(parser) { @@ -211,25 +222,30 @@ private: MipsOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + struct Token { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNum; + RegisterKind Kind; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned Base; + const MCExpr *Off; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNum; - RegisterKind Kind; - } Reg; - - struct { - const MCExpr *Val; - } Imm; - - struct { - unsigned Base; - const MCExpr *Off; - } Mem; + struct Token Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; SMLoc StartLoc, EndLoc; @@ -385,6 +401,56 @@ public: }; } +namespace llvm { +extern const MCInstrDesc MipsInsts[]; +} +static const MCInstrDesc &getInstDesc(unsigned Opcode) { + return MipsInsts[Opcode]; +} + +bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + Inst.setLoc(IDLoc); + if (MCID.mayLoad() || MCID.mayStore()) { + // Check the offset of memory operand, if it is a symbol + // reference or immediate we may have to expand instructions + for (unsigned i=0;i<MCID.getNumOperands();i++) { + const MCOperandInfo &OpInfo = MCID.OpInfo[i]; + if ((OpInfo.OperandType == MCOI::OPERAND_MEMORY) || + (OpInfo.OperandType == MCOI::OPERAND_UNKNOWN)) { + MCOperand &Op = Inst.getOperand(i); + if (Op.isImm()) { + int MemOffset = Op.getImm(); + if (MemOffset < -32768 || MemOffset > 32767) { + // Offset can't exceed 16bit value + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),true); + return false; + } + } else if (Op.isExpr()) { + const MCExpr *Expr = Op.getExpr(); + if (Expr->getKind() == MCExpr::SymbolRef){ + const MCSymbolRefExpr *SR = + static_cast<const MCSymbolRefExpr*>(Expr); + if (SR->getKind() == MCSymbolRefExpr::VK_None) { + // Expand symbol + expandMemInst(Inst,IDLoc,Instructions,MCID.mayLoad(),false); + return false; + } + } + } + } + } + } + + if (needsExpansion(Inst)) + expandInstruction(Inst, IDLoc, Instructions); + else + Instructions.push_back(Inst); + + return false; +} + bool MipsAsmParser::needsExpansion(MCInst &Inst) { switch(Inst.getOpcode()) { @@ -531,28 +597,103 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, } } +void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions, + bool isLoad,bool isImmOpnd) { + const MCSymbolRefExpr *SR; + MCInst TempInst; + unsigned ImmOffset,HiOffset,LoOffset; + const MCExpr *ExprOffset; + unsigned TmpRegNum; + unsigned AtRegNum = getReg((isMips64()) ? Mips::CPU64RegsRegClassID: + Mips::CPURegsRegClassID, + getATReg()); + // 1st operand is either source or dst register + assert(Inst.getOperand(0).isReg() && "expected register operand kind"); + unsigned RegOpNum = Inst.getOperand(0).getReg(); + // 2nd operand is base register + assert(Inst.getOperand(1).isReg() && "expected register operand kind"); + unsigned BaseRegNum = Inst.getOperand(1).getReg(); + // 3rd operand is either immediate or expression + if (isImmOpnd) { + assert(Inst.getOperand(2).isImm() && "expected immediate operand kind"); + ImmOffset = Inst.getOperand(2).getImm(); + LoOffset = ImmOffset & 0x0000ffff; + HiOffset = (ImmOffset & 0xffff0000) >> 16; + // If msb of LoOffset is 1(negative number) we must increment HiOffset + if (LoOffset & 0x8000) + HiOffset++; + } + else + ExprOffset = Inst.getOperand(2).getExpr(); + // All instructions will have the same location + TempInst.setLoc(IDLoc); + // 1st instruction in expansion is LUi. For load instruction we can use + // the dst register as a temporary if base and dst are different, + // but for stores we must use $at + TmpRegNum = (isLoad && (BaseRegNum != RegOpNum))?RegOpNum:AtRegNum; + TempInst.setOpcode(Mips::LUi); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(HiOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + SR = static_cast<const MCSymbolRefExpr*>(ExprOffset); + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_HI, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(HiExpr)); + } + } + // Add the instruction to the list + Instructions.push_back(TempInst); + // and prepare TempInst for next instruction + TempInst.clear(); + // which is add temp register to base + TempInst.setOpcode(Mips::ADDu); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + TempInst.addOperand(MCOperand::CreateReg(BaseRegNum)); + Instructions.push_back(TempInst); + TempInst.clear(); + // and finaly, create original instruction with low part + // of offset and new base + TempInst.setOpcode(Inst.getOpcode()); + TempInst.addOperand(MCOperand::CreateReg(RegOpNum)); + TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); + if (isImmOpnd) + TempInst.addOperand(MCOperand::CreateImm(LoOffset)); + else { + if (ExprOffset->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr:: + Create(SR->getSymbol().getName(), + MCSymbolRefExpr::VK_Mips_ABS_LO, + getContext()); + TempInst.addOperand(MCOperand::CreateExpr(LoExpr)); + } + } + Instructions.push_back(TempInst); + TempInst.clear(); +} + bool MipsAsmParser:: MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCStreamer &Out, unsigned &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; + SmallVector<MCInst, 8> Instructions; unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { default: break; case Match_Success: { - if (needsExpansion(Inst)) { - SmallVector<MCInst, 4> Instructions; - expandInstruction(Inst, IDLoc, Instructions); - for(unsigned i =0; i < Instructions.size(); i++){ - Out.EmitInstruction(Instructions[i]); - } - } else { - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst); - } + if (processInstruction(Inst,IDLoc,Instructions)) + return true; + for(unsigned i =0; i < Instructions.size(); i++) + Out.EmitInstruction(Instructions[i]); return false; } case Match_MissingFeature: @@ -812,6 +953,11 @@ bool MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*>&Operands, return false; } case AsmToken::Identifier: + // Look for the existing symbol, we should check if + // we need to assigne the propper RegisterKind + if (searchSymbolAlias(Operands,MipsOperand::Kind_None)) + return false; + //else drop to expression parsing case AsmToken::LParen: case AsmToken::Minus: case AsmToken::Plus: @@ -883,24 +1029,25 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { // Check the type of the expression if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal)) { - // it's a constant, evaluate lo or hi value - int Val = MCE->getValue(); + // It's a constant, evaluate lo or hi value if (Str == "lo") { - Val = Val & 0xffff; + short Val = MCE->getValue(); + Res = MCConstantExpr::Create(Val, getContext()); } else if (Str == "hi") { + int Val = MCE->getValue(); int LoSign = Val & 0x8000; Val = (Val & 0xffff0000) >> 16; - //lower part is treated as signed int, so if it is negative - //we must add 1 to hi part to compensate + // Lower part is treated as a signed int, so if it is negative + // we must add 1 to the hi part to compensate if (LoSign) Val++; + Res = MCConstantExpr::Create(Val, getContext()); } - Res = MCConstantExpr::Create(Val, getContext()); return false; } if (const MCSymbolRefExpr *MSRE = dyn_cast<MCSymbolRefExpr>(IdVal)) { - // it's a symbol, create symbolic expression from symbol + // It's a symbol, create symbolic expression from symbol StringRef Symbol = MSRE->getSymbol().getName(); MCSymbolRefExpr::VariantKind VK = getVariantKind(Str); Res = MCSymbolRefExpr::Create(Symbol,VK,getContext()); @@ -925,6 +1072,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res) { switch(getLexer().getKind()) { default: return true; + case AsmToken::Identifier: case AsmToken::Integer: case AsmToken::Minus: case AsmToken::Plus: @@ -1004,6 +1152,11 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!isMips64()) return MatchOperand_NoMatch; + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPU64Regs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1018,9 +1171,52 @@ MipsAsmParser::parseCPU64Regs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_NoMatch; } +bool MipsAsmParser:: +searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + unsigned RegisterKind) { + + MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier()); + if (Sym) { + SMLoc S = Parser.getTok().getLoc(); + const MCExpr *Expr; + if (Sym->isVariable()) + Expr = Sym->getVariableValue(); + else + return false; + if (Expr->getKind() == MCExpr::SymbolRef) { + const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr); + const StringRef DefSymbol = Ref->getSymbol().getName(); + if (DefSymbol.startswith("$")) { + // Lookup for the register with corresponding name + int RegNum = matchRegisterName(DefSymbol.substr(1),isMips64()); + if (RegNum > -1) { + Parser.Lex(); + MipsOperand *op = MipsOperand::CreateReg(RegNum,S, + Parser.getTok().getLoc()); + op->setRegKind((MipsOperand::RegisterKind)RegisterKind); + Operands.push_back(op); + return true; + } + } + } else if (Expr->getKind() == MCExpr::Constant) { + Parser.Lex(); + const MCConstantExpr *Const = static_cast<const MCConstantExpr*>(Expr); + MipsOperand *op = MipsOperand::CreateImm(Const,S, + Parser.getTok().getLoc()); + Operands.push_back(op); + return true; + } + } + return false; +} MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseCPURegs(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + if (getLexer().getKind() == AsmToken::Identifier) { + if (searchSymbolAlias(Operands,MipsOperand::Kind_CPURegs)) + return MatchOperand_Success; + return MatchOperand_NoMatch; + } // if the first token is not '$' we have an error if (Parser.getTok().isNot(AsmToken::Dollar)) return MatchOperand_NoMatch; @@ -1316,13 +1512,13 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { } bool MipsAsmParser::parseSetNoAtDirective() { - // line should look like: + // Line should look like: // .set noat // set at reg to 0 Options.setATReg(0); // eat noat Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1341,12 +1537,12 @@ bool MipsAsmParser::parseSetAtDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } else if (getLexer().is(AsmToken::Equal)) { - getParser().Lex(); //eat '=' + getParser().Lex(); // eat '=' if (getLexer().isNot(AsmToken::Dollar)) { reportParseError("unexpected token in statement"); return false; } - Parser.Lex(); // eat '$' + Parser.Lex(); // Eat '$' const AsmToken &Reg = Parser.getTok(); if (Reg.is(AsmToken::Identifier)) { AtRegNo = matchCPURegisterName(Reg.getIdentifier()); @@ -1366,7 +1562,7 @@ bool MipsAsmParser::parseSetAtDirective() { reportParseError("unexpected token in statement"); return false; } - getParser().Lex(); //eat reg + getParser().Lex(); // Eat reg if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); @@ -1382,7 +1578,7 @@ bool MipsAsmParser::parseSetAtDirective() { bool MipsAsmParser::parseSetReorderDirective() { Parser.Lex(); - // if this is not the end of the statement, report error + // If this is not the end of the statement, report error if (getLexer().isNot(AsmToken::EndOfStatement)) { reportParseError("unexpected token in statement"); return false; @@ -1431,6 +1627,31 @@ bool MipsAsmParser::parseSetNoMacroDirective() { Parser.Lex(); // Consume the EndOfStatement return false; } + +bool MipsAsmParser::parseSetAssignment() { + StringRef Name; + const MCExpr *Value; + + if (Parser.parseIdentifier(Name)) + reportParseError("expected identifier after .set"); + + if (getLexer().isNot(AsmToken::Comma)) + return reportParseError("unexpected token in .set directive"); + Lex(); //eat comma + + if (Parser.parseExpression(Value)) + reportParseError("expected valid expression after comma"); + + // check if the Name already exists as a symbol + MCSymbol *Sym = getContext().LookupSymbol(Name); + if (Sym) { + return reportParseError("symbol already defined"); + } + Sym = getContext().GetOrCreateSymbol(Name); + Sym->setVariableValue(Value); + + return false; +} bool MipsAsmParser::parseDirectiveSet() { // get next token @@ -1456,6 +1677,10 @@ bool MipsAsmParser::parseDirectiveSet() { // ignore this directive for now Parser.eatToEndOfStatement(); return false; + } else { + // it is just an identifier, look for assignment + parseSetAssignment(); + return false; } return true; diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 58aa1be34d..cf8bb189e4 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -16,6 +16,8 @@ add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen Mips16FrameLowering.cpp Mips16InstrInfo.cpp + Mips16ISelDAGToDAG.cpp + Mips16ISelLowering.cpp Mips16RegisterInfo.cpp MipsAnalyzeImmediate.cpp MipsAsmPrinter.cpp @@ -33,6 +35,8 @@ add_llvm_target(MipsCodeGen MipsRegisterInfo.cpp MipsSEFrameLowering.cpp MipsSEInstrInfo.cpp + MipsSEISelDAGToDAG.cpp + MipsSEISelLowering.cpp MipsSERegisterInfo.cpp MipsSubtarget.cpp MipsTargetMachine.cpp diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 025a783f93..59e49d8ddc 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -138,10 +138,10 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, @@ -484,14 +484,14 @@ static DecodeStatus DecodeHWRegs64RegisterClass(MCInst &Inst, return MCDisassembler::Success; } -static DecodeStatus DecodeACRegsRegisterClass(MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeACRegsDSPRegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { if (RegNo >= 4) return MCDisassembler::Fail; - unsigned Reg = getReg(Decoder, Mips::ACRegsRegClassID, RegNo); + unsigned Reg = getReg(Decoder, Mips::ACRegsDSPRegClassID, RegNo); Inst.addOperand(MCOperand::CreateReg(Reg)); return MCDisassembler::Success; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 96f93a0789..e198a7c983 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -160,8 +160,9 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions or immediates"); @@ -179,8 +180,9 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const { const MCOperand &MO = MI.getOperand(OpNo); - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) return MO.getImm(); + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm()>>2; + assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions or an immediate"); diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 25f4ffb929..54fdb78714 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -20,7 +20,7 @@ namespace llvm { class Mips16FrameLowering : public MipsFrameLowering { public: explicit Mips16FrameLowering(const MipsSubtarget &STI) - : MipsFrameLowering(STI) {} + : MipsFrameLowering(STI, 8) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp new file mode 100644 index 0000000000..00b3449300 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -0,0 +1,308 @@ +//===-- Mips16ISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips16 ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "Mips16ISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = Mips::Mflo16; + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = Mips::Mfhi16; + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC = + (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + V2 = RegInfo.createVirtualRegister(RC); + + BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) + .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); + BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); + BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) + .addReg(V1).addReg(V2); +} + +// Insert instructions to initialize the Mips16 SP Alias register in the +// first MBB of the function. +// +void Mips16DAGToDAGISel::initMips16SPAliasReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->mips16SPAliasRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); + + BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) + .addReg(Mips::SP); +} + +void Mips16DAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + initMips16SPAliasReg(MF); +} + +/// getMips16SPAliasReg - Output the instructions required to put the +/// SP into a Mips16 accessible aliased register. +SDValue Mips16DAGToDAGISel::getMips16SPAliasReg() { + unsigned Mips16SPAliasReg = + MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); + return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); +} + +void Mips16DAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { + SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); + if (Parent) { + switch (Parent->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + case ISD::STORE: { + StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); + switch (SD->getMemoryVT().getSizeInBits()) { + case 8: + case 16: + AliasReg = TM.getFrameLowering()->hasFP(*MF)? + AliasFPReg: getMips16SPAliasReg(); + return; + } + break; + } + } + } + AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); + return; + +} + +bool Mips16DAGToDAGISel::selectAddr16( + SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, + SDValue &Alias) { + EVT ValTy = Addr.getValueType(); + + Alias = CurDAG->getTargetConstant(0, ValTy); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + getMips16SPRefReg(Parent, Alias); + return true; + } + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + getMips16SPRefReg(Parent, Alias); + } + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + + // If an indexed floating point load/store can be emitted, return false. + const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); + + if (LS && + (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && + Subtarget.hasFPIdx()) + return false; + } + Base = Addr; + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; +} + +/// Select instructions not customized! Used for +/// expanded, promoted and normal instructions +std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2), CmpLHS; + unsigned Opc = InFlag.getOpcode(); (void)Opc; + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + unsigned MOp; + if (Opcode == ISD::ADDE) { + CmpLHS = InFlag.getValue(0); + MOp = Mips::AdduRxRyRz16; + } else { + CmpLHS = InFlag.getOperand(0); + MOp = Mips::SubuRxRyRz16; + } + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + + EVT VT = LHS.getValueType(); + + unsigned Sltu_op = Mips::SltuRxRyRz16; + SDNode *Carry = CurDAG->getMachineNode(Sltu_op, DL, VT, Ops, 2); + unsigned Addu_op = Mips::AdduRxRyRz16; + SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, DL, VT, + SDValue(Carry,0), RHS); + + SDNode *Result = CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry,0)); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : Mips::MultRxRy16); + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + case ISD::MULHS: + case ISD::MULHU: { + MultOpc = (Opcode == ISD::MULHU ? Mips::MultuRxRy16 : Mips::MultRxRy16); + SDNode *Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) { + return new Mips16DAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h new file mode 100644 index 0000000000..baa85877d9 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -0,0 +1,51 @@ +//===---- Mips16ISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPS16ISELDAGTODAG_H +#define MIPS16ISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class Mips16DAGToDAGISel : public MipsDAGToDAGISel { +public: + explicit Mips16DAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, + EVT Ty, bool HasLo, bool HasHi); + + SDValue getMips16SPAliasReg(); + + void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg); + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); + + void initMips16SPAliasReg(MachineFunction &MF); +}; + +FunctionPass *createMips16ISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp new file mode 100644 index 0000000000..23eb5375ac --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -0,0 +1,689 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-lower" +#include "Mips16ISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <set> + +using namespace llvm; + +static cl::opt<bool> +Mips16HardFloat("mips16-hard-float", cl::NotHidden, + cl::desc("MIPS: mips16 hard float enable."), + cl::init(false)); + +static cl::opt<bool> DontExpandCondPseudos16( + "mips16-dont-expand-cond-pseudo", + cl::init(false), + cl::desc("Dont expand conditional move related " + "pseudos for Mips 16"), + cl::Hidden); + +namespace { + std::set<const char*, MipsTargetLowering::LTStr> NoHelperNeeded; +} + +Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // + // set up as if mips32 and then revert so we can test the mechanism + // for switching + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + computeRegisterProperties(); + clearRegisterClasses(); + + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); + + if (Mips16HardFloat) + setMips16HardFloatLibCalls(); + + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMips16TargetLowering(MipsTargetMachine &TM) { + return new Mips16TargetLowering(TM); +} + +bool +Mips16TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + return false; +} + +MachineBasicBlock * +Mips16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::SelBeqZ: + return emitSel16(Mips::BeqzRxImm16, MI, BB); + case Mips::SelBneZ: + return emitSel16(Mips::BnezRxImm16, MI, BB); + case Mips::SelTBteqZCmpi: + return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBteqZSlti: + return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBteqZSltiu: + return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBtneZCmpi: + return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); + case Mips::SelTBtneZSlti: + return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); + case Mips::SelTBtneZSltiu: + return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SelTBteqZCmp: + return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBteqZSlt: + return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBteqZSltu: + return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::SelTBtneZCmp: + return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::SelTBtneZSlt: + return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::SelTBtneZSltu: + return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); + case Mips::BteqzT8SltX16: + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); + case Mips::BteqzT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); + case Mips::BtnezT8CmpX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); + case Mips::BtnezT8SltX16: + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); + case Mips::BtnezT8SltuX16: + // TBD: figure out a way to get this or remove the instruction + // altogether. + return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); + case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); + case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( + Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + break; + case Mips::SltCCRxRy16: + return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); + break; + case Mips::SltiCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); + case Mips::SltiuCCRxImmX16: + return emitFEXT_CCRXI16_ins + (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); + case Mips::SltuCCRxRy16: + return emitFEXT_CCRX16_ins + (Mips::SltuRxRy16, MI, BB); + } +} + +bool Mips16TargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + // No tail call optimization for mips16. + return false; +} + +void Mips16TargetLowering::setMips16LibcallName + (RTLIB::Libcall L, const char *Name) { + setLibcallName(L, Name); + NoHelperNeeded.insert(Name); +} + +void Mips16TargetLowering::setMips16HardFloatLibCalls() { + setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); + setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); + setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); + setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); + setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); + setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); + setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); + setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); + setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); + setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); + setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); + setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); + setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); + setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); + setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); + setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); + setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); + setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); + setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); + setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); + setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); + setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); + setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); + setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); + setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); + setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); + setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); + setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); + setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); +} + + +// +// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much +// cleaner way to do all of this but it will have to wait until the traditional +// gcc mechanism is completed. +// +// For Pic, in order for Mips16 code to call Mips32 code which according the abi +// have either arguments or returned values placed in floating point registers, +// we use a set of helper functions. (This includes functions which return type +// complex which on Mips are returned in a pair of floating point registers). +// +// This is an encoding that we inherited from gcc. +// In Mips traditional O32, N32 ABI, floating point numbers are passed in +// floating point argument registers 1,2 only when the first and optionally +// the second arguments are float (sf) or double (df). +// For Mips16 we are only concerned with the situations where floating point +// arguments are being passed in floating point registers by the ABI, because +// Mips16 mode code cannot execute floating point instructions to load those +// values and hence helper functions are needed. +// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) +// the helper function suffixs for these are: +// 0, 1, 5, 9, 2, 6, 10 +// this suffix can then be calculated as follows: +// for a given argument Arg: +// Arg1x, Arg2x = 1 : Arg is sf +// 2 : Arg is df +// 0: Arg is neither sf or df +// So this stub is the string for number Arg1x + Arg2x*4. +// However not all numbers between 0 and 10 are possible, we check anyway and +// assert if the impossible exists. +// + +unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber + (ArgListTy &Args) const { + unsigned int resultNum = 0; + if (Args.size() >= 1) { + Type *t = Args[0].Ty; + if (t->isFloatTy()) { + resultNum = 1; + } + else if (t->isDoubleTy()) { + resultNum = 2; + } + } + if (resultNum) { + if (Args.size() >=2) { + Type *t = Args[1].Ty; + if (t->isFloatTy()) { + resultNum += 4; + } + else if (t->isDoubleTy()) { + resultNum += 8; + } + } + } + return resultNum; +} + +// +// prefixs are attached to stub numbers depending on the return type . +// return type: float sf_ +// double df_ +// single complex sc_ +// double complext dc_ +// others NO PREFIX +// +// +// The full name of a helper function is__mips16_call_stub + +// return type dependent prefix + stub number +// +// +// This is something that probably should be in a different source file and +// perhaps done differently but my main purpose is to not waste runtime +// on something that we can enumerate in the source. Another possibility is +// to have a python script to generate these mapping tables. This will do +// for now. There are a whole series of helper function mapping arrays, one +// for each return type class as outlined above. There there are 11 possible +// entries. Ones with 0 are ones which should never be selected +// +// All the arrays are similar except for ones which return neither +// sf, df, sc, dc, in which only care about ones which have sf or df as a +// first parameter. +// +#define P_ "__mips16_call_stub_" +#define MAX_STUB_NUMBER 10 +#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" +#define T P "0" , T1 +#define P P_ +static char const * vMips16Helper[MAX_STUB_NUMBER+1] = + {0, T1 }; +#undef P +#define P P_ "sf_" +static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "df_" +static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "sc_" +static char const * scMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#define P P_ "dc_" +static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = + { T }; +#undef P +#undef P_ + + +const char* Mips16TargetLowering:: + getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const { + const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); +#ifndef NDEBUG + const unsigned int maxStubNum = 10; + assert(stubNum <= maxStubNum); + const bool validStubNum[maxStubNum+1] = + {true, true, true, false, false, true, true, false, false, true, true}; + assert(validStubNum[stubNum]); +#endif + const char *result; + if (RetTy->isFloatTy()) { + result = sfMips16Helper[stubNum]; + } + else if (RetTy ->isDoubleTy()) { + result = dfMips16Helper[stubNum]; + } + else if (RetTy->isStructTy()) { + // check if it's complex + if (RetTy->getNumContainedTypes() == 2) { + if ((RetTy->getContainedType(0)->isFloatTy()) && + (RetTy->getContainedType(1)->isFloatTy())) { + result = scMips16Helper[stubNum]; + } + else if ((RetTy->getContainedType(0)->isDoubleTy()) && + (RetTy->getContainedType(1)->isDoubleTy())) { + result = dcMips16Helper[stubNum]; + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + llvm_unreachable("Uncovered condition"); + } + } + else { + if (stubNum == 0) { + needHelper = false; + return ""; + } + result = vMips16Helper[stubNum]; + } + needHelper = true; + return result; +} + +void Mips16TargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + SelectionDAG &DAG = CLI.DAG; + const char* Mips16HelperFunction = 0; + bool NeedMips16Helper = false; + + if (getTargetMachine().Options.UseSoftFloat && Mips16HardFloat) { + // + // currently we don't have symbols tagged with the mips16 or mips32 + // qualifier so we will assume that we don't know what kind it is. + // and generate the helper + // + bool LookupHelper = true; + if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) { + if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { + LookupHelper = false; + } + } + if (LookupHelper) Mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); + + } + + SDValue JumpTarget = Callee; + + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned V0Reg = Mips::V0; + if (NeedMips16Helper) { + RegsToPass.push_front(std::make_pair(V0Reg, Callee)); + JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy()); + JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); + } else + RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee)); + } + + Ops.push_back(JumpTarget); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +MachineBasicBlock *Mips16TargetLowering:: +emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) + .addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitSelT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addReg(MI->getOperand(4).getReg()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock *Mips16TargetLowering::emitSeliT16 + (unsigned Opc1, unsigned Opc2, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) + .addImm(MI->getOperand(4).getImm()); + BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + BuildMI(*BB, BB->begin(), DL, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} + +MachineBasicBlock + *Mips16TargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + unsigned regY = MI->getOperand(1).getReg(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned regX = MI->getOperand(0).getReg(); + int64_t imm = MI->getOperand(1).getImm(); + MachineBasicBlock *target = MI->getOperand(2).getMBB(); + unsigned CmpOpc; + if (isUInt<8>(imm)) + CmpOpc = CmpiOpc; + else if (isUInt<16>(imm)) + CmpOpc = CmpiXOpc; + else + llvm_unreachable("immediate field not usable"); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); + BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +static unsigned Mips16WhichOp8uOr16simm + (unsigned shortOp, unsigned longOp, int64_t Imm) { + if (isUInt<8>(Imm)) + return shortOp; + else if (isInt<16>(Imm)) + return longOp; + else + llvm_unreachable("immediate field not usable"); +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + unsigned regY = MI->getOperand(2).getReg(); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addReg(regY); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock *Mips16TargetLowering::emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const { + if (DontExpandCondPseudos16) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + unsigned CC = MI->getOperand(0).getReg(); + unsigned regX = MI->getOperand(1).getReg(); + int64_t Imm = MI->getOperand(2).getImm(); + unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(SltOpc)).addReg(regX).addImm(Imm); + BuildMI(*BB, MI, MI->getDebugLoc(), + TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; + +} diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h new file mode 100644 index 0000000000..b23e2a1f37 --- /dev/null +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -0,0 +1,80 @@ +//===-- Mips16ISelLowering.h - Mips16 DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips16. +// +//===----------------------------------------------------------------------===// + +#ifndef Mips16ISELLOWERING_H +#define Mips16ISELLOWERING_H + +#include "MipsISelLowering.h" + +namespace llvm { + class Mips16TargetLowering : public MipsTargetLowering { + public: + explicit Mips16TargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + void setMips16LibcallName(RTLIB::Libcall, const char *Name); + + void setMips16HardFloatLibCalls(); + + unsigned int + getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + + const char *getMips16HelperFunction + (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, + MachineInstr *MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_T8I8I16_ins( + unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRX16_ins( + unsigned SltOpc, + MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *emitFEXT_CCRXI16_ins( + unsigned SltiOpc, unsigned SltiXOpc, + MachineInstr *MI, MachineBasicBlock *BB )const; + }; +} + +#endif // Mips16ISELLOWERING_H diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index fd3cc8f190..17dd2c0796 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -98,10 +98,10 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void Mips16InstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, + int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); @@ -110,14 +110,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = Mips::SwRxSpImmX16; assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); } void Mips16InstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); @@ -126,7 +125,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Mips::CPU16RegsRegClass.hasSubClassEq(RC)) Opc = Mips::LwRxSpImmX16; assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset) .addMemOperand(MMO); } diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index 1cb1dfe196..a77a9043bb 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -48,17 +48,19 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + virtual void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + virtual void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index a9e9c52716..aa51aaf465 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -15,7 +15,7 @@ // Mips Address // def addr16 : - ComplexPattern<iPTR, 3, "SelectAddr16", [frameindex], [SDNPWantParent]>; + ComplexPattern<iPTR, 3, "selectAddr16", [frameindex], [SDNPWantParent]>; // // Address operand @@ -1466,14 +1466,14 @@ def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>; // MipsDivRem // def: Mips16Pat - <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry), + <(MipsDivRem16 CPU16Regs:$rx, CPU16Regs:$ry), (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; // // MipsDivRemU // def: Mips16Pat - <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry), + <(MipsDivRemU16 CPU16Regs:$rx, CPU16Regs:$ry), (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>; // signed a,b diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index 0ea9368949..6cca227685 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -72,6 +72,12 @@ bool Mips16RegisterInfo::saveScavengerRegister return true; } +const TargetRegisterClass * +Mips16RegisterInfo::intRegClass(unsigned Size) const { + assert(Size == 4); + return &Mips::CPU16RegsRegClass; +} + void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index b8f818a478..2b3d2b1a4e 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -37,6 +37,8 @@ public: const TargetRegisterClass *RC, unsigned Reg) const; + virtual const TargetRegisterClass *intRegClass(unsigned Size) const; + private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 5903b9e623..846a8224af 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -66,6 +66,14 @@ let usesCustomInserter = 1, Predicates = [HasStdEnc], defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64<atomic_cmp_swap_64>; } +/// Pseudo instructions for loading, storing and copying accumulator registers. +let isPseudo = 1 in { + defm LOAD_AC128 : LoadM<"load_ac128", ACRegs128>; + defm STORE_AC128 : StoreM<"store_ac128", ACRegs128>; +} + +def COPY_AC128 : PseudoSE<(outs ACRegs128:$dst), (ins ACRegs128:$src), []>; + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -179,10 +187,16 @@ def DMULT : Mult<"dmult", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1c>; def DMULTu : Mult<"dmultu", IIImul, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1d>; -def DSDIV : Div<MipsDivRem, "ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, - MULT_FM<0, 0x1e>; -def DUDIV : Div<MipsDivRemU, "ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, - MULT_FM<0, 0x1f>; +def PseudoDMULT : MultDivPseudo<DMULT, ACRegs128, CPU64RegsOpnd, MipsMult, + IIImul>; +def PseudoDMULTu : MultDivPseudo<DMULTu, ACRegs128, CPU64RegsOpnd, MipsMultu, + IIImul>; +def DSDIV : Div<"ddiv", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1e>; +def DUDIV : Div<"ddivu", IIIdiv, CPU64RegsOpnd, [HI64, LO64]>, MULT_FM<0, 0x1f>; +def PseudoDSDIV : MultDivPseudo<DSDIV, ACRegs128, CPU64RegsOpnd, MipsDivRem, + IIIdiv, 0>; +def PseudoDUDIV : MultDivPseudo<DUDIV, ACRegs128, CPU64RegsOpnd, MipsDivRemU, + IIIdiv, 0>; def MTHI64 : MoveToLOHI<"mthi", CPU64Regs, [HI64]>, MTLO_FM<0x11>; def MTLO64 : MoveToLOHI<"mtlo", CPU64Regs, [LO64]>, MTLO_FM<0x13>; @@ -306,6 +320,10 @@ def : MipsPat<(i64 (sext_inreg CPU64Regs:$src, i32)), // bswap MipsPattern def : MipsPat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>; +// mflo/hi patterns. +def : MipsPat<(i64 (ExtractLOHI ACRegs128:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACRegs128:$ac, imm:$lohi_idx)>; + //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// @@ -332,13 +350,19 @@ def : InstAlias<"not $rt, $rs", def : InstAlias<"j $rs", (JR64 CPU64Regs:$rs), 0>, Requires<[HasMips64]>; def : InstAlias<"jalr $rs", (JALR64 RA_64, CPU64Regs:$rs)>, Requires<[HasMips64]>; +def : InstAlias<"jal $rs", (JALR64 RA_64, CPU64Regs:$rs), 0>, + Requires<[HasMips64]>; +def : InstAlias<"jal $rd,$rs", (JALR64 CPU64Regs:$rd, CPU64Regs:$rs), 0>, + Requires<[HasMips64]>; def : InstAlias<"daddu $rs, $rt, $imm", (DADDiu CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), 1>; def : InstAlias<"dadd $rs, $rt, $imm", (DADDi CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, simm16_64:$imm), 1>; - +def : InstAlias<"or $rs, $rt, $imm", + (ORi64 CPU64RegsOpnd:$rs, CPU64RegsOpnd:$rt, uimm16_64:$imm), + 1>, Requires<[HasMips64]>; /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64" in { diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 9531b91487..3c116e1264 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -20,17 +20,18 @@ def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>; def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>; // Mips-specific dsp nodes -def SDT_MipsExtr : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; -def SDT_MipsShilo : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -def SDT_MipsDPA : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>]>; +def SDT_MipsExtr : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, + SDTCisVT<2, untyped>]>; +def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, + SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>; +def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>, + SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; class MipsDSPBase<string Opc, SDTypeProfile Prof> : - SDNode<!strconcat("MipsISD::", Opc), Prof, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; + SDNode<!strconcat("MipsISD::", Opc), Prof>; class MipsDSPSideEffectBase<string Opc, SDTypeProfile Prof> : - SDNode<!strconcat("MipsISD::", Opc), Prof, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect]>; + SDNode<!strconcat("MipsISD::", Opc), Prof, [SDNPHasChain, SDNPSideEffect]>; def MipsEXTP : MipsDSPSideEffectBase<"EXTP", SDT_MipsExtr>; def MipsEXTPDP : MipsDSPSideEffectBase<"EXTPDP", SDT_MipsExtr>; @@ -40,7 +41,7 @@ def MipsEXTR_R_W : MipsDSPSideEffectBase<"EXTR_R_W", SDT_MipsExtr>; def MipsEXTR_RS_W : MipsDSPSideEffectBase<"EXTR_RS_W", SDT_MipsExtr>; def MipsSHILO : MipsDSPBase<"SHILO", SDT_MipsShilo>; -def MipsMTHLIP : MipsDSPBase<"MTHLIP", SDT_MipsShilo>; +def MipsMTHLIP : MipsDSPSideEffectBase<"MTHLIP", SDT_MipsShilo>; def MipsMULSAQ_S_W_PH : MipsDSPSideEffectBase<"MULSAQ_S_W_PH", SDT_MipsDPA>; def MipsMAQ_S_W_PHL : MipsDSPSideEffectBase<"MAQ_S_W_PHL", SDT_MipsDPA>; @@ -383,7 +384,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode, class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, InstrItinClass itin> { dag OutOperandList = (outs CPURegs:$rt); - dag InOperandList = (ins ACRegs:$ac, CPURegs:$shift_rs); + dag InOperandList = (ins ACRegsDSP:$ac, CPURegs:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; list<Register> Defs = [DSPCtrl]; @@ -392,46 +393,40 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode, class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode, InstrItinClass itin> { dag OutOperandList = (outs CPURegs:$rt); - dag InOperandList = (ins ACRegs:$ac, uimm16:$shift_rs); + dag InOperandList = (ins ACRegsDSP:$ac, uimm16:$shift_rs); string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs"); InstrItinClass Itinerary = itin; list<Register> Defs = [DSPCtrl]; } -class SHILO_R1_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins simm16:$shift), [(OpNode immSExt6:$shift)]>, - PseudoInstExpansion<(realinst AC0, simm16:$shift)> { - list<Register> Defs = [DSPCtrl, AC0]; - list<Register> Uses = [AC0]; - InstrItinClass Itinerary = itin; -} - -class SHILO_R1_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins simm16:$shift); +class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins simm16:$shift, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $shift"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode immSExt6:$shift, ACRegsDSP:$acin))]; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$acin = $ac"; } -class SHILO_R2_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins CPURegs:$rs), [(OpNode CPURegs:$rs)]>, - PseudoInstExpansion<(realinst AC0, CPURegs:$rs)> { - list<Register> Defs = [DSPCtrl, AC0]; - list<Register> Uses = [AC0]; - InstrItinClass Itinerary = itin; -} - -class SHILO_R2_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins CPURegs:$rs); +class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, ACRegsDSP:$acin))]; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$acin = $ac"; } -class MTHLIP_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins CPURegs:$rs); +class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$rs, $ac"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, ACRegsDSP:$acin))]; + list<Register> Uses = [DSPCtrl]; + string Constraints = "$acin = $ac"; } class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, @@ -454,35 +449,37 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode, list<Register> Defs = [DSPCtrl]; } -class DPA_W_PH_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), - [(OpNode CPURegs:$rs, CPURegs:$rt)]>, - PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { - list<Register> Defs = [DSPCtrl, AC0]; - list<Register> Uses = [AC0]; - InstrItinClass Itinerary = itin; +class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin); + string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))]; + list<Register> Defs = [DSPCtrl]; + string Constraints = "$acin = $ac"; } -class DPA_W_PH_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); +class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs ACRegsDSP:$ac); dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); -} - -class MULT_PSEUDO_BASE<SDPatternOperator OpNode, InstrItinClass itin, - Instruction realinst> : - PseudoDSP<(outs), (ins CPURegs:$rs, CPURegs:$rt), - [(OpNode CPURegs:$rs, CPURegs:$rt)]>, - PseudoInstExpansion<(realinst AC0, CPURegs:$rs, CPURegs:$rt)> { - list<Register> Defs = [DSPCtrl, AC0]; + list<dag> Pattern = [(set ACRegsDSP:$ac, (OpNode CPURegs:$rs, CPURegs:$rt))]; InstrItinClass Itinerary = itin; + int AddedComplexity = 20; + bit isCommutable = 1; } -class MULT_DESC_BASE<string instr_asm> { - dag OutOperandList = (outs ACRegs:$ac); - dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt); +class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode, + InstrItinClass itin> { + dag OutOperandList = (outs ACRegsDSP:$ac); + dag InOperandList = (ins CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin); string AsmString = !strconcat(instr_asm, "\t$ac, $rs, $rt"); + list<dag> Pattern = [(set ACRegsDSP:$ac, + (OpNode CPURegs:$rs, CPURegs:$rt, ACRegsDSP:$acin))]; + InstrItinClass Itinerary = itin; + int AddedComplexity = 20; + string Constraints = "$acin = $ac"; } class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> : @@ -717,44 +714,40 @@ class MULQ_RS_PH_DESC : ADDU_QB_DESC_BASE<"mulq_rs.ph", int_mips_mulq_rs_ph, NoItinerary, DSPRegs, DSPRegs>, IsCommutable; -class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph">; +class MULSAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsaq_s.w.ph", + MipsMULSAQ_S_W_PH>; -class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl">; +class MAQ_S_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phl", MipsMAQ_S_W_PHL>; -class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr">; +class MAQ_S_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_s.w.phr", MipsMAQ_S_W_PHR>; -class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl">; +class MAQ_SA_W_PHL_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phl", MipsMAQ_SA_W_PHL>; -class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr">; +class MAQ_SA_W_PHR_DESC : DPA_W_PH_DESC_BASE<"maq_sa.w.phr", MipsMAQ_SA_W_PHR>; // Dot product with accumulate/subtract -class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl">; - -class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr">; - -class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl">; - -class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr">; +class DPAU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbl", MipsDPAU_H_QBL>; -class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph">; +class DPAU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpau.h.qbr", MipsDPAU_H_QBR>; -class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph">; +class DPSU_H_QBL_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbl", MipsDPSU_H_QBL>; -class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w">; +class DPSU_H_QBR_DESC : DPA_W_PH_DESC_BASE<"dpsu.h.qbr", MipsDPSU_H_QBR>; -class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w">; +class DPAQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaq_s.w.ph", MipsDPAQ_S_W_PH>; -class MULT_DSP_DESC : MULT_DESC_BASE<"mult">; +class DPSQ_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsq_s.w.ph", MipsDPSQ_S_W_PH>; -class MULTU_DSP_DESC : MULT_DESC_BASE<"multu">; +class DPAQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpaq_sa.l.w", MipsDPAQ_SA_L_W>; -class MADD_DSP_DESC : MULT_DESC_BASE<"madd">; +class DPSQ_SA_L_W_DESC : DPA_W_PH_DESC_BASE<"dpsq_sa.l.w", MipsDPSQ_SA_L_W>; -class MADDU_DSP_DESC : MULT_DESC_BASE<"maddu">; - -class MSUB_DSP_DESC : MULT_DESC_BASE<"msub">; - -class MSUBU_DSP_DESC : MULT_DESC_BASE<"msubu">; +class MULT_DSP_DESC : MULT_DESC_BASE<"mult", MipsMult, NoItinerary>; +class MULTU_DSP_DESC : MULT_DESC_BASE<"multu", MipsMultu, NoItinerary>; +class MADD_DSP_DESC : MADD_DESC_BASE<"madd", MipsMAdd, NoItinerary>; +class MADDU_DSP_DESC : MADD_DESC_BASE<"maddu", MipsMAddu, NoItinerary>; +class MSUB_DSP_DESC : MADD_DESC_BASE<"msub", MipsMSub, NoItinerary>; +class MSUBU_DSP_DESC : MADD_DESC_BASE<"msubu", MipsMSubu, NoItinerary>; // Comparison class CMPU_EQ_QB_DESC : CMP_EQ_QB_R2_DESC_BASE<"cmpu.eq.qb", @@ -867,11 +860,11 @@ class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H, class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>; -class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo">; +class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>; -class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov">; +class SHILOV_DESC : SHILO_R2_DESC_BASE<"shilov", MipsSHILO>; -class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip">; +class MTHLIP_DESC : MTHLIP_DESC_BASE<"mthlip", MipsMTHLIP>; class RDDSP_DESC : RDDSP_DESC_BASE<"rddsp", int_mips_rddsp, NoItinerary>; @@ -975,23 +968,25 @@ class MULQ_S_PH_DESC : ADDU_QB_DESC_BASE<"mulq_s.ph", int_mips_mulq_s_ph, IsCommutable; // Dot product with accumulate/subtract -class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph">; +class DPA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpa.w.ph", MipsDPA_W_PH>; -class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph">; +class DPS_W_PH_DESC : DPA_W_PH_DESC_BASE<"dps.w.ph", MipsDPS_W_PH>; -class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph">; +class DPAQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_s.w.ph", MipsDPAQX_S_W_PH>; -class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph">; +class DPAQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpaqx_sa.w.ph", + MipsDPAQX_SA_W_PH>; -class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph">; +class DPAX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpax.w.ph", MipsDPAX_W_PH>; -class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph">; +class DPSX_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsx.w.ph", MipsDPSX_W_PH>; -class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph">; +class DPSQX_S_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_s.w.ph", MipsDPSQX_S_W_PH>; -class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph">; +class DPSQX_SA_W_PH_DESC : DPA_W_PH_DESC_BASE<"dpsqx_sa.w.ph", + MipsDPSQX_SA_W_PH>; -class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph">; +class MULSA_W_PH_DESC : DPA_W_PH_DESC_BASE<"mulsa.w.ph", MipsMULSA_W_PH>; // Precision reduce/expand class PRECR_QB_PH_DESC : CMP_EQ_QB_R3_DESC_BASE<"precr.qb.ph", @@ -1206,71 +1201,14 @@ def PREPEND : PREPEND_ENC, PREPEND_DESC; } // Pseudos. -def MULSAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSAQ_S_W_PH, NoItinerary, - MULSAQ_S_W_PH>; -def MAQ_S_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHL, NoItinerary, - MAQ_S_W_PHL>; -def MAQ_S_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_S_W_PHR, NoItinerary, - MAQ_S_W_PHR>; -def MAQ_SA_W_PHL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHL, NoItinerary, - MAQ_SA_W_PHL>; -def MAQ_SA_W_PHR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMAQ_SA_W_PHR, NoItinerary, - MAQ_SA_W_PHR>; -def DPAU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBL, NoItinerary, - DPAU_H_QBL>; -def DPAU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAU_H_QBR, NoItinerary, - DPAU_H_QBR>; -def DPSU_H_QBL_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBL, NoItinerary, - DPSU_H_QBL>; -def DPSU_H_QBR_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSU_H_QBR, NoItinerary, - DPSU_H_QBR>; -def DPAQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_S_W_PH, NoItinerary, - DPAQ_S_W_PH>; -def DPSQ_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_S_W_PH, NoItinerary, - DPSQ_S_W_PH>; -def DPAQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQ_SA_L_W, NoItinerary, - DPAQ_SA_L_W>; -def DPSQ_SA_L_W_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQ_SA_L_W, NoItinerary, - DPSQ_SA_L_W>; - -def MULT_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULT, NoItinerary, MULT_DSP>, - IsCommutable; -def MULTU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMULTU, NoItinerary, MULTU_DSP>, - IsCommutable; -def MADD_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADD_DSP, NoItinerary, MADD_DSP>, - IsCommutable, UseAC; -def MADDU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMADDU_DSP, NoItinerary, MADDU_DSP>, - IsCommutable, UseAC; -def MSUB_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUB_DSP, NoItinerary, MSUB_DSP>, - UseAC; -def MSUBU_DSP_PSEUDO : MULT_PSEUDO_BASE<MipsMSUBU_DSP, NoItinerary, MSUBU_DSP>, - UseAC; - -def SHILO_PSEUDO : SHILO_R1_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILO>; -def SHILOV_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsSHILO, NoItinerary, SHILOV>; -def MTHLIP_PSEUDO : SHILO_R2_PSEUDO_BASE<MipsMTHLIP, NoItinerary, MTHLIP>; - -let Predicates = [HasDSPR2] in { - -def DPA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPA_W_PH, NoItinerary, DPA_W_PH>; -def DPS_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPS_W_PH, NoItinerary, DPS_W_PH>; -def DPAQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_S_W_PH, NoItinerary, - DPAQX_S_W_PH>; -def DPAQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAQX_SA_W_PH, NoItinerary, - DPAQX_SA_W_PH>; -def DPAX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPAX_W_PH, NoItinerary, - DPAX_W_PH>; -def DPSX_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSX_W_PH, NoItinerary, - DPSX_W_PH>; -def DPSQX_S_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_S_W_PH, NoItinerary, - DPSQX_S_W_PH>; -def DPSQX_SA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsDPSQX_SA_W_PH, NoItinerary, - DPSQX_SA_W_PH>; -def MULSA_W_PH_PSEUDO : DPA_W_PH_PSEUDO_BASE<MipsMULSA_W_PH, NoItinerary, - MULSA_W_PH>; - +/// Pseudo instructions for loading, storing and copying accumulator registers. +let isPseudo = 1 in { + defm LOAD_AC_DSP : LoadM<"load_ac_dsp", ACRegsDSP>; + defm STORE_AC_DSP : StoreM<"store_ac_dsp", ACRegsDSP>; } +def COPY_AC_DSP : PseudoSE<(outs ACRegsDSP:$dst), (ins ACRegsDSP:$src), []>; + // Patterns. class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> : Pat<pattern, result>, Requires<[pred]>; @@ -1296,10 +1234,12 @@ def : DSPPat<(store (v4i8 DSPRegs:$val), addr:$a), // Extr patterns. class EXTR_W_TY1_R2_Pat<SDPatternOperator OpNode, Instruction Instr> : - DSPPat<(i32 (OpNode CPURegs:$rs)), (Instr AC0, CPURegs:$rs)>; + DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)), + (Instr ACRegsDSP:$ac, CPURegs:$rs)>; class EXTR_W_TY1_R1_Pat<SDPatternOperator OpNode, Instruction Instr> : - DSPPat<(i32 (OpNode immZExt5:$shift)), (Instr AC0, immZExt5:$shift)>; + DSPPat<(i32 (OpNode immZExt5:$shift, ACRegsDSP:$ac)), + (Instr ACRegsDSP:$ac, immZExt5:$shift)>; def : EXTR_W_TY1_R1_Pat<MipsEXTP, EXTP>; def : EXTR_W_TY1_R2_Pat<MipsEXTP, EXTPV>; @@ -1313,3 +1253,19 @@ def : EXTR_W_TY1_R1_Pat<MipsEXTR_RS_W, EXTR_RS_W>; def : EXTR_W_TY1_R2_Pat<MipsEXTR_RS_W, EXTRV_RS_W>; def : EXTR_W_TY1_R1_Pat<MipsEXTR_S_H, EXTR_S_H>; def : EXTR_W_TY1_R2_Pat<MipsEXTR_S_H, EXTRV_S_H>; + +// mflo/hi patterns. +let AddedComplexity = 20 in +def : DSPPat<(i32 (ExtractLOHI ACRegsDSP:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACRegsDSP:$ac, imm:$lohi_idx)>; + +// Indexed load patterns. +class IndexedLoadPat<SDPatternOperator LoadNode, Instruction Instr> : + DSPPat<(i32 (LoadNode (add i32:$base, i32:$index))), + (Instr i32:$base, i32:$index)>; + +let AddedComplexity = 20 in { + def : IndexedLoadPat<zextloadi8, LBUX>; + def : IndexedLoadPat<sextloadi16, LHX>; + def : IndexedLoadPat<load, LWX>; +} diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index e265590141..d07a595af3 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -220,9 +220,9 @@ namespace { /// that can be moved to the delay slot. Returns true on success. bool searchForward(MachineBasicBlock &MBB, Iter Slot) const; - /// This function searches MBB's successor blocks for an instruction that - /// can be moved to the delay slot and inserts clones of the instruction - /// into the successor blocks. + /// This function searches one of MBB's successor blocks for an instruction + /// that can be moved to the delay slot and inserts clones of the + /// instruction into the successor's predecessor blocks. bool searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const; /// Pick a successor block of MBB. Return NULL if MBB doesn't have a diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 14268d2130..6a5f79d0df 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -26,9 +26,8 @@ protected: const MipsSubtarget &STI; public: - explicit MipsFrameLowering(const MipsSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.hasMips64() ? 16 : 8, 0, - sti.hasMips64() ? 16 : 8), STI(sti) {} + explicit MipsFrameLowering(const MipsSubtarget &sti, unsigned Alignment) + : TargetFrameLowering(StackGrowsDown, Alignment, 0, Alignment), STI(sti) {} static const MipsFrameLowering *create(MipsTargetMachine &TM, const MipsSubtarget &ST); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 6dff548505..77b08cb11e 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -12,19 +12,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-isel" +#include "MipsISelDAGToDAG.h" +#include "Mips16ISelDAGToDAG.h" +#include "MipsSEISelDAGToDAG.h" #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" -#include "MipsSubtarget.h" -#include "MipsTargetMachine.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -45,270 +45,11 @@ using namespace llvm; // MipsDAGToDAGISel - MIPS specific code to select MIPS machine // instructions for SelectionDAG operations. //===----------------------------------------------------------------------===// -namespace { - -class MipsDAGToDAGISel : public SelectionDAGISel { - - /// TM - Keep a reference to MipsTargetMachine. - MipsTargetMachine &TM; - - /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can - /// make the right decision when generating code for different targets. - const MipsSubtarget &Subtarget; - -public: - explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : - SelectionDAGISel(tm), - TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {} - - // Pass Name - virtual const char *getPassName() const { - return "MIPS DAG->DAG Pattern Instruction Selection"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - -private: - // Include the pieces autogenerated from the target description. - #include "MipsGenDAGISel.inc" - - /// getTargetMachine - Return a reference to the TargetMachine, casted - /// to the target-specific type. - const MipsTargetMachine &getTargetMachine() { - return static_cast<const MipsTargetMachine &>(TM); - } - - /// getInstrInfo - Return a reference to the TargetInstrInfo, casted - /// to the target-specific type. - const MipsInstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); - } - - SDNode *getGlobalBaseReg(); - - SDValue getMips16SPAliasReg(); - - void getMips16SPRefReg(SDNode *parent, SDValue &AliasReg); - - std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, - EVT Ty, bool HasLo, bool HasHi); - - SDNode *Select(SDNode *N); - - // Complex Pattern. - /// (reg + imm). - bool selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Fall back on this function if all else fails. - bool selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - /// Match integer address pattern. - bool selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const; - - bool SelectAddr16(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Alias); - - // getImm - Return a target constant with the specified value. - inline SDValue getImm(const SDNode *Node, uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); - } - - void ProcessFunctionAfterISel(MachineFunction &MF); - bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); - void InitGlobalBaseReg(MachineFunction &MF); - void InitMips16SPAliasReg(MachineFunction &MF); - - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); -}; - -} - -// Insert instructions to initialize the global base register in the -// first MBB of the function. When the ABI is O32 and the relocation model is -// PIC, the necessary instructions are emitted later to prevent optimization -// passes from moving them. -void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->globalBaseRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg(); - const TargetRegisterClass *RC; - - if (Subtarget.isABI_N64()) - RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; - else if (Subtarget.inMips16Mode()) - RC = (const TargetRegisterClass*)&Mips::CPU16RegsRegClass; - else - RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; - - V0 = RegInfo.createVirtualRegister(RC); - V1 = RegInfo.createVirtualRegister(RC); - V2 = RegInfo.createVirtualRegister(RC); - - if (Subtarget.isABI_N64()) { - MF.getRegInfo().addLiveIn(Mips::T9_64); - MBB.addLiveIn(Mips::T9_64); - - // lui $v0, %hi(%neg(%gp_rel(fname))) - // daddu $v1, $v0, $t9 - // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) - .addReg(Mips::T9_64); - BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - if (Subtarget.inMips16Mode()) { - BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1) - .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO); - BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16); - BuildMI(MBB, I, DL, TII.get(Mips::AdduRxRyRz16), GlobalBaseReg) - .addReg(V1).addReg(V2); - return; - } - - if (MF.getTarget().getRelocationModel() == Reloc::Static) { - // Set global register to __gnu_local_gp. - // - // lui $v0, %hi(__gnu_local_gp) - // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) - .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); - return; - } - - MF.getRegInfo().addLiveIn(Mips::T9); - MBB.addLiveIn(Mips::T9); - - if (Subtarget.isABI_N32()) { - // lui $v0, %hi(%neg(%gp_rel(fname))) - // addu $v1, $v0, $t9 - // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) - const GlobalValue *FName = MF.getFunction(); - BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); - BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) - .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); - return; - } - - assert(Subtarget.isABI_O32()); - - // For O32 ABI, the following instruction sequence is emitted to initialize - // the global base register: - // - // 0. lui $2, %hi(_gp_disp) - // 1. addiu $2, $2, %lo(_gp_disp) - // 2. addu $globalbasereg, $2, $t9 - // - // We emit only the last instruction here. - // - // GNU linker requires that the first two instructions appear at the beginning - // of a function and no instructions be inserted before or between them. - // The two instructions are emitted during lowering to MC layer in order to - // avoid any reordering. - // - // Register $2 (Mips::V0) is added to the list of live-in registers to ensure - // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) - // reads it. - MF.getRegInfo().addLiveIn(Mips::V0); - MBB.addLiveIn(Mips::V0); - BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) - .addReg(Mips::V0).addReg(Mips::T9); -} - -// Insert instructions to initialize the Mips16 SP Alias register in the -// first MBB of the function. -// -void MipsDAGToDAGISel::InitMips16SPAliasReg(MachineFunction &MF) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); - - if (!MipsFI->mips16SPAliasRegSet()) - return; - - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator I = MBB.begin(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - unsigned Mips16SPAliasReg = MipsFI->getMips16SPAliasReg(); - - BuildMI(MBB, I, DL, TII.get(Mips::MoveR3216), Mips16SPAliasReg) - .addReg(Mips::SP); -} - - -bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, - const MachineInstr& MI) { - unsigned DstReg = 0, ZeroReg = 0; - - // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". - if ((MI.getOpcode() == Mips::ADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO; - } else if ((MI.getOpcode() == Mips::DADDiu) && - (MI.getOperand(1).getReg() == Mips::ZERO_64) && - (MI.getOperand(2).getImm() == 0)) { - DstReg = MI.getOperand(0).getReg(); - ZeroReg = Mips::ZERO_64; - } - - if (!DstReg) - return false; - - // Replace uses with ZeroReg. - for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), - E = MRI->use_end(); U != E;) { - MachineOperand &MO = U.getOperand(); - unsigned OpNo = U.getOperandNo(); - MachineInstr *MI = MO.getParent(); - ++U; - - // Do not replace if it is a phi's operand or is tied to def operand. - if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) - continue; - - MO.setReg(ZeroReg); - } - - return true; -} - -void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) { - InitGlobalBaseReg(MF); - InitMips16SPAliasReg(MF); - - MachineRegisterInfo *MRI = &MF.getRegInfo(); - - for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; - ++MFI) - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - ReplaceUsesWithZeroReg(MRI, *I); -} bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { bool Ret = SelectionDAGISel::runOnMachineFunction(MF); - ProcessFunctionAfterISel(MF); + processFunctionAfterISel(MF); return Ret; } @@ -320,233 +61,36 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() { return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } -/// getMips16SPAliasReg - Output the instructions required to put the -/// SP into a Mips16 accessible aliased register. -SDValue MipsDAGToDAGISel::getMips16SPAliasReg() { - unsigned Mips16SPAliasReg = - MF->getInfo<MipsFunctionInfo>()->getMips16SPAliasReg(); - return CurDAG->getRegister(Mips16SPAliasReg, TLI.getPointerTy()); -} - /// ComplexPattern used on MipsInstrInfo /// Used on Mips Load/Store instructions bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) const { - EVT ValTy = Addr.getValueType(); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; - } - - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - } - + llvm_unreachable("Unimplemented function."); return false; } bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, SDValue &Offset) const { - Base = Addr; - Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); - return true; + llvm_unreachable("Unimplemented function."); + return false; } bool MipsDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, SDValue &Offset) const { - return selectAddrRegImm(Addr, Base, Offset) || - selectAddrDefault(Addr, Base, Offset); -} - -void MipsDAGToDAGISel::getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg) { - SDValue AliasFPReg = CurDAG->getRegister(Mips::S0, TLI.getPointerTy()); - if (Parent) { - switch (Parent->getOpcode()) { - case ISD::LOAD: { - LoadSDNode *SD = dyn_cast<LoadSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - case ISD::STORE: { - StoreSDNode *SD = dyn_cast<StoreSDNode>(Parent); - switch (SD->getMemoryVT().getSizeInBits()) { - case 8: - case 16: - AliasReg = TM.getFrameLowering()->hasFP(*MF)? - AliasFPReg: getMips16SPAliasReg(); - return; - } - break; - } - } - } - AliasReg = CurDAG->getRegister(Mips::SP, TLI.getPointerTy()); - return; - -} -bool MipsDAGToDAGISel::SelectAddr16( - SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset, - SDValue &Alias) { - EVT ValTy = Addr.getValueType(); - - Alias = CurDAG->getTargetConstant(0, ValTy); - - // if Address is FI, get the TargetFrameIndex. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - Offset = CurDAG->getTargetConstant(0, ValTy); - getMips16SPRefReg(Parent, Alias); - return true; - } - // on PIC code Load GA - if (Addr.getOpcode() == MipsISD::Wrapper) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; - } - if (TM.getRelocationModel() != Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetExternalSymbol || - Addr.getOpcode() == ISD::TargetGlobalAddress)) - return false; - } - // Addresses of the form FI+const or FI|const - if (CurDAG->isBaseWithConstantOffset(Addr)) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); - if (isInt<16>(CN->getSExtValue())) { - - // If the first operand is a FI, get the TargetFI Node - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> - (Addr.getOperand(0))) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); - getMips16SPRefReg(Parent, Alias); - } - else - Base = Addr.getOperand(0); - - Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); - return true; - } - } - // Operand is a result from an ADD. - if (Addr.getOpcode() == ISD::ADD) { - // When loading from constant pools, load the lower address part in - // the instruction itself. Example, instead of: - // lui $2, %hi($CPI1_0) - // addiu $2, $2, %lo($CPI1_0) - // lwc1 $f0, 0($2) - // Generate: - // lui $2, %hi($CPI1_0) - // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || - Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { - SDValue Opnd0 = Addr.getOperand(1).getOperand(0); - if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || - isa<JumpTableSDNode>(Opnd0)) { - Base = Addr.getOperand(0); - Offset = Opnd0; - return true; - } - } - - // If an indexed floating point load/store can be emitted, return false. - const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); - - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; - } - Base = Addr; - Offset = CurDAG->getTargetConstant(0, ValTy); - return true; + llvm_unreachable("Unimplemented function."); + return false; } -/// Select multiply instructions. -std::pair<SDNode*, SDNode*> -MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty, - bool HasLo, bool HasHi) { - SDNode *Lo = 0, *Hi = 0; - SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0), - N->getOperand(1)); - SDValue InFlag = SDValue(Mul, 0); - - if (HasLo) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mflo16 : - (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); - Lo = CurDAG->getMachineNode(Opcode, dl, Ty, MVT::Glue, InFlag); - InFlag = SDValue(Lo, 1); - } - if (HasHi) { - unsigned Opcode = Subtarget.inMips16Mode() ? Mips::Mfhi16 : - (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); - Hi = CurDAG->getMachineNode(Opcode, dl, Ty, InFlag); - } - return std::make_pair(Lo, Hi); +bool MipsDAGToDAGISel::selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias) { + llvm_unreachable("Unimplemented function."); + return false; } - /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); - DebugLoc dl = Node->getDebugLoc(); // Dump information about the Node being selected DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); @@ -557,167 +101,19 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return NULL; } - /// - // Instruction Selection not handled by the auto-generated - // tablegen selection should be handled here. - /// - EVT NodeTy = Node->getValueType(0); - unsigned MultOpc; + // See if subclasses can handle this node. + std::pair<bool, SDNode*> Ret = selectNode(Node); + + if (Ret.first) + return Ret.second; switch(Opcode) { default: break; - case ISD::SUBE: - case ISD::ADDE: { - bool inMips16Mode = Subtarget.inMips16Mode(); - SDValue InFlag = Node->getOperand(2), CmpLHS; - unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && - "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); - - unsigned MOp; - if (Opcode == ISD::ADDE) { - CmpLHS = InFlag.getValue(0); - if (inMips16Mode) - MOp = Mips::AdduRxRyRz16; - else - MOp = Mips::ADDu; - } else { - CmpLHS = InFlag.getOperand(0); - if (inMips16Mode) - MOp = Mips::SubuRxRyRz16; - else - MOp = Mips::SUBu; - } - - SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; - - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); - - EVT VT = LHS.getValueType(); - - unsigned Sltu_op = inMips16Mode? Mips::SltuRxRyRz16: Mips::SLTu; - SDNode *Carry = CurDAG->getMachineNode(Sltu_op, dl, VT, Ops, 2); - unsigned Addu_op = inMips16Mode? Mips::AdduRxRyRz16 : Mips::ADDu; - SDNode *AddCarry = CurDAG->getMachineNode(Addu_op, dl, VT, - SDValue(Carry,0), RHS); - - return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, - LHS, SDValue(AddCarry,0)); - } - - /// Mul with two results - case ISD::SMUL_LOHI: - case ISD::UMUL_LOHI: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MultuRxRy16 : - Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); - - std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy, - true, true); - - if (!SDValue(Node, 0).use_empty()) - ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); - - if (!SDValue(Node, 1).use_empty()) - ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - - return NULL; - } - - /// Special Muls - case ISD::MUL: { - // Mips32 has a 32-bit three operand mul instruction. - if (Subtarget.hasMips32() && NodeTy == MVT::i32) - break; - return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT, - dl, NodeTy, true, false).first; - } - case ISD::MULHS: - case ISD::MULHU: { - if (NodeTy == MVT::i32) { - if (Subtarget.inMips16Mode()) - MultOpc = (Opcode == ISD::MULHU ? - Mips::MultuRxRy16 : Mips::MultRxRy16); - else - MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - } - else - MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); - - return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second; - } - // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: return getGlobalBaseReg(); - case ISD::ConstantFP: { - ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - if (Subtarget.hasMips64()) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO_64, MVT::i64); - return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero); - } - - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Mips::ZERO, MVT::i32); - return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero, - Zero); - } - break; - } - - case ISD::Constant: { - const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); - unsigned Size = CN->getValueSizeInBits(0); - - if (Size == 32) - break; - - MipsAnalyzeImmediate AnalyzeImm; - int64_t Imm = CN->getSExtValue(); - - const MipsAnalyzeImmediate::InstSeq &Seq = - AnalyzeImm.Analyze(Imm, Size, false); - - MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); - DebugLoc DL = CN->getDebugLoc(); - SDNode *RegOpnd; - SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - - // The first instruction can be a LUi which is different from other - // instructions (ADDiu, ORI and SLL) in that it does not have a register - // operand. - if (Inst->Opc == Mips::LUi64) - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); - else - RegOpnd = - CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - CurDAG->getRegister(Mips::ZERO_64, MVT::i64), - ImmOpnd); - - // The remaining instructions in the sequence are handled here. - for (++Inst; Inst != Seq.end(); ++Inst) { - ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), - MVT::i64); - RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, - SDValue(RegOpnd, 0), ImmOpnd); - } - - return RegOpnd; - } - #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: @@ -726,31 +122,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { "Unexpected unaligned loads/stores."); break; #endif - - case MipsISD::ThreadPointer: { - EVT PtrVT = TLI.getPointerTy(); - unsigned RdhwrOpc, SrcReg, DestReg; - - if (PtrVT == MVT::i32) { - RdhwrOpc = Mips::RDHWR; - SrcReg = Mips::HWR29; - DestReg = Mips::V1; - } else { - RdhwrOpc = Mips::RDHWR64; - SrcReg = Mips::HWR29_64; - DestReg = Mips::V1_64; - } - - SDNode *Rdhwr = - CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), - Node->getValueType(0), - CurDAG->getRegister(SrcReg, PtrVT)); - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg, - SDValue(Rdhwr, 0)); - SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT); - ReplaceUses(SDValue(Node, 0), ResNode); - return ResNode.getNode(); - } } // Select the default instruction @@ -776,5 +147,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, /// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { - return new MipsDAGToDAGISel(TM); + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16ISelDag(TM); + + return llvm::createMipsSEISelDag(TM); } diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h new file mode 100644 index 0000000000..cf0f9c58aa --- /dev/null +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -0,0 +1,93 @@ +//===---- MipsISelDAGToDAG.h - A Dag to Dag Inst Selector for Mips --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines an instruction selector for the MIPS target. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSISELDAGTODAG_H +#define MIPSISELDAGTODAG_H + +#include "Mips.h" +#include "MipsSubtarget.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/SelectionDAGISel.h" + +//===----------------------------------------------------------------------===// +// Instruction Selector Implementation +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MipsDAGToDAGISel - MIPS specific code to select MIPS machine +// instructions for SelectionDAG operations. +//===----------------------------------------------------------------------===// +namespace llvm { + +class MipsDAGToDAGISel : public SelectionDAGISel { +public: + explicit MipsDAGToDAGISel(MipsTargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {} + + // Pass Name + virtual const char *getPassName() const { + return "MIPS DAG->DAG Pattern Instruction Selection"; + } + + virtual bool runOnMachineFunction(MachineFunction &MF); + +protected: + SDNode *getGlobalBaseReg(); + + /// Keep a pointer to the MipsSubtarget around so that we can make the right + /// decision when generating code for different targets. + const MipsSubtarget &Subtarget; + +private: + // Include the pieces autogenerated from the target description. + #include "MipsGenDAGISel.inc" + + // Complex Pattern. + /// (reg + imm). + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Fall back on this function if all else fails. + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + /// Match integer address pattern. + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias); + + virtual SDNode *Select(SDNode *N); + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0; + + // getImm - Return a target constant with the specified value. + inline SDValue getImm(const SDNode *Node, uint64_t Imm) { + return CurDAG->getTargetConstant(Imm, Node->getValueType(0)); + } + + virtual void processFunctionAfterISel(MachineFunction &MF) = 0; + + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps); +}; + +/// createMipsISelDag - This pass converts a legalized DAG into a +/// MIPS-specific DAG, ready for instruction scheduling. +FunctionPass *createMipsISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c452dee8d0..e2219f257e 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-lower" -#include <set> #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -42,26 +41,9 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> -EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, - cl::desc("MIPS: Enable tail calls."), cl::init(false)); - -static cl::opt<bool> LargeGOT("mxgot", cl::Hidden, cl::desc("MIPS: Enable GOT larger than 64k."), cl::init(false)); -static cl::opt<bool> -Mips16HardFloat("mips16-hard-float", cl::NotHidden, - cl::desc("MIPS: mips16 hard float enable."), - cl::init(false)); - -static cl::opt<bool> DontExpandCondPseudos16( - "mips16-dont-expand-cond-pseudo", - cl::init(false), - cl::desc("Dont expand conditional move related " - "pseudos for Mips 16"), - cl::Hidden); - - static const uint16_t O32IntRegs[4] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; @@ -88,7 +70,7 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { return true; } -static SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) { +SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>(); return DAG.getRegister(FI->getGlobalBaseReg(), Ty); } @@ -123,7 +105,8 @@ static SDValue getAddrNonPIC(SDValue Op, SelectionDAG &DAG) { DAG.getNode(MipsISD::Lo, DL, Ty, Lo)); } -static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { +SDValue MipsTargetLowering::getAddrLocal(SDValue Op, SelectionDAG &DAG, + bool HasMips64) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT; @@ -137,7 +120,8 @@ static SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) { return DAG.getNode(ISD::ADD, DL, Ty, Load, Lo); } -static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { +SDValue MipsTargetLowering::getAddrGlobal(SDValue Op, SelectionDAG &DAG, + unsigned Flag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty), @@ -146,8 +130,9 @@ static SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) { MachinePointerInfo::getGOT(), false, false, false, 0); } -static SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, - unsigned HiFlag, unsigned LoFlag) { +SDValue MipsTargetLowering::getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, + unsigned LoFlag) const { DebugLoc DL = Op.getDebugLoc(); EVT Ty = Op.getValueType(); SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty, getTargetNode(Op, DAG, HiFlag)); @@ -173,12 +158,18 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F"; case MipsISD::FPRound: return "MipsISD::FPRound"; + case MipsISD::ExtractLOHI: return "MipsISD::ExtractLOHI"; + case MipsISD::InsertLOHI: return "MipsISD::InsertLOHI"; + case MipsISD::Mult: return "MipsISD::Mult"; + case MipsISD::Multu: return "MipsISD::Multu"; case MipsISD::MAdd: return "MipsISD::MAdd"; case MipsISD::MAddu: return "MipsISD::MAddu"; case MipsISD::MSub: return "MipsISD::MSub"; case MipsISD::MSubu: return "MipsISD::MSubu"; case MipsISD::DivRem: return "MipsISD::DivRem"; case MipsISD::DivRemU: return "MipsISD::DivRemU"; + case MipsISD::DivRem16: return "MipsISD::DivRem16"; + case MipsISD::DivRemU16: return "MipsISD::DivRemU16"; case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::Wrapper: return "MipsISD::Wrapper"; @@ -211,110 +202,17 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -namespace { - struct LTStr { - bool operator()(const char *S1, const char *S2) const - { - return strcmp(S1, S2) < 0; - } - }; - - std::set<const char*, LTStr> NoHelperNeeded; -} - -void MipsTargetLowering::setMips16LibcallName - (RTLIB::Libcall L, const char *Name) { - setLibcallName(L, Name); - NoHelperNeeded.insert(Name); -} - -void MipsTargetLowering::setMips16HardFloatLibCalls() { - setMips16LibcallName(RTLIB::ADD_F32, "__mips16_addsf3"); - setMips16LibcallName(RTLIB::ADD_F64, "__mips16_adddf3"); - setMips16LibcallName(RTLIB::SUB_F32, "__mips16_subsf3"); - setMips16LibcallName(RTLIB::SUB_F64, "__mips16_subdf3"); - setMips16LibcallName(RTLIB::MUL_F32, "__mips16_mulsf3"); - setMips16LibcallName(RTLIB::MUL_F64, "__mips16_muldf3"); - setMips16LibcallName(RTLIB::DIV_F32, "__mips16_divsf3"); - setMips16LibcallName(RTLIB::DIV_F64, "__mips16_divdf3"); - setMips16LibcallName(RTLIB::FPEXT_F32_F64, "__mips16_extendsfdf2"); - setMips16LibcallName(RTLIB::FPROUND_F64_F32, "__mips16_truncdfsf2"); - setMips16LibcallName(RTLIB::FPTOSINT_F32_I32, "__mips16_fix_truncsfsi"); - setMips16LibcallName(RTLIB::FPTOSINT_F64_I32, "__mips16_fix_truncdfsi"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F32, "__mips16_floatsisf"); - setMips16LibcallName(RTLIB::SINTTOFP_I32_F64, "__mips16_floatsidf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F32, "__mips16_floatunsisf"); - setMips16LibcallName(RTLIB::UINTTOFP_I32_F64, "__mips16_floatunsidf"); - setMips16LibcallName(RTLIB::OEQ_F32, "__mips16_eqsf2"); - setMips16LibcallName(RTLIB::OEQ_F64, "__mips16_eqdf2"); - setMips16LibcallName(RTLIB::UNE_F32, "__mips16_nesf2"); - setMips16LibcallName(RTLIB::UNE_F64, "__mips16_nedf2"); - setMips16LibcallName(RTLIB::OGE_F32, "__mips16_gesf2"); - setMips16LibcallName(RTLIB::OGE_F64, "__mips16_gedf2"); - setMips16LibcallName(RTLIB::OLT_F32, "__mips16_ltsf2"); - setMips16LibcallName(RTLIB::OLT_F64, "__mips16_ltdf2"); - setMips16LibcallName(RTLIB::OLE_F32, "__mips16_lesf2"); - setMips16LibcallName(RTLIB::OLE_F64, "__mips16_ledf2"); - setMips16LibcallName(RTLIB::OGT_F32, "__mips16_gtsf2"); - setMips16LibcallName(RTLIB::OGT_F64, "__mips16_gtdf2"); - setMips16LibcallName(RTLIB::UO_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::UO_F64, "__mips16_unorddf2"); - setMips16LibcallName(RTLIB::O_F32, "__mips16_unordsf2"); - setMips16LibcallName(RTLIB::O_F64, "__mips16_unorddf2"); -} - MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) : TargetLowering(TM, new MipsTargetObjectFile()), Subtarget(&TM.getSubtarget<MipsSubtarget>()), HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()), IsO32(Subtarget->isABI_O32()) { - // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - // Set up the register classes - addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); - - if (HasMips64) - addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); - - if (Subtarget->inMips16Mode()) { - addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); - if (Mips16HardFloat) - setMips16HardFloatLibCalls(); - } - - if (Subtarget->hasDSP()) { - MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; - - for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { - addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); - - // Expand all builtin opcodes. - for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) - setOperationAction(Opc, VecTys[i], Expand); - - setOperationAction(ISD::LOAD, VecTys[i], Legal); - setOperationAction(ISD::STORE, VecTys[i], Legal); - setOperationAction(ISD::BITCAST, VecTys[i], Legal); - } - } - - if (!TM.Options.UseSoftFloat) { - addRegisterClass(MVT::f32, &Mips::FGR32RegClass); - - // When dealing with single precision only, use libcalls - if (!Subtarget->isSingleFloat()) { - if (HasMips64) - addRegisterClass(MVT::f64, &Mips::FGR64RegClass); - else - addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); - } - } - // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); @@ -348,18 +246,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - } - else { - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - } - if (!Subtarget->inMips16Mode()) { - setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i32, Custom); - } if (!TM.Options.NoNaNsFPMath) { setOperationAction(ISD::FABS, MVT::f32, Custom); @@ -472,21 +358,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); - if (Subtarget->inMips16Mode()) { - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); - } - setInsertFencesForAtomic(true); if (!Subtarget->hasSEInReg()) { @@ -511,8 +382,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setTruncStoreAction(MVT::i64, MVT::i32, Custom); } - setTargetDAGCombine(ISD::ADDE); - setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); setTargetDAGCombine(ISD::SELECT); @@ -523,7 +392,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setMinFunctionAlignment(HasMips64 ? 3 : 2); setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP); - computeRegisterProperties(); setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0); setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1); @@ -531,22 +399,11 @@ MipsTargetLowering(MipsTargetMachine &TM) MaxStoresPerMemcpy = 16; } -bool -MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { - MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; - - if (Subtarget->inMips16Mode()) - return false; +const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) { + if (TM.getSubtargetImpl()->inMips16Mode()) + return llvm::createMips16TargetLowering(TM); - switch (SVT) { - case MVT::i64: - case MVT::i32: - if (Fast) - *Fast = true; - return true; - default: - return false; - } + return llvm::createMipsSETargetLowering(TM); } EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { @@ -555,178 +412,6 @@ EVT MipsTargetLowering::getSetCCResultType(EVT VT) const { return VT.changeVectorElementTypeToInteger(); } -// selectMADD - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc multLo, Lo0), (adde multHi, Hi0), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { - // ADDENode's second operand must be a flag output of an ADDC node in order - // for the matching to be successful. - SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); - - if (ADDCNode->getOpcode() != ISD::ADDC) - return false; - - SDValue MultHi = ADDENode->getOperand(0); - SDValue MultLo = ADDCNode->getOperand(0); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MADD only if ADDENode and ADDCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than ADDENode or ADDCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MADD instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDValue Chain = CurDAG->getEntryNode(); - DebugLoc DL = ADDENode->getDebugLoc(); - - // create MipsMAdd(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - - SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - ADDCNode->getOperand(1),// Lo0 - ADDENode->getOperand(1));// Hi0 - - // create CopyFromReg nodes - SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32, - MAdd); - SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL, - Mips::HI, MVT::i32, - CopyFromLo.getValue(2)); - - // replace uses of adde and addc here - if (!SDValue(ADDCNode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo); - - if (!SDValue(ADDENode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi); - - return true; -} - -// selectMSUB - -// Transforms a subgraph in CurDAG if the following pattern is found: -// (addc Lo0, multLo), (sube Hi0, multHi), -// where, -// multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if pattern matching was successful. -static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { - // SUBENode's second operand must be a flag output of an SUBC node in order - // for the matching to be successful. - SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); - - if (SUBCNode->getOpcode() != ISD::SUBC) - return false; - - SDValue MultHi = SUBENode->getOperand(1); - SDValue MultLo = SUBCNode->getOperand(1); - SDNode *MultNode = MultHi.getNode(); - unsigned MultOpc = MultHi.getOpcode(); - - // MultHi and MultLo must be generated by the same node, - if (MultLo.getNode() != MultNode) - return false; - - // and it must be a multiplication. - if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) - return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. - if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) - return false; - - // Transform this to a MSUB only if SUBENode and SUBCNode are the only users - // of the values of MultNode, in which case MultNode will be removed in later - // phases. - // If there exist users other than SUBENode or SUBCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MSUB instructions being - // produced. - if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) - return false; - - SDValue Chain = CurDAG->getEntryNode(); - DebugLoc DL = SUBENode->getDebugLoc(); - - // create MipsSub(u) node - MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; - - SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, - MultNode->getOperand(0),// Factor 0 - MultNode->getOperand(1),// Factor 1 - SUBCNode->getOperand(0),// Lo0 - SUBENode->getOperand(0));// Hi0 - - // create CopyFromReg nodes - SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, DL, Mips::LO, MVT::i32, - MSub); - SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), DL, - Mips::HI, MVT::i32, - CopyFromLo.getValue(2)); - - // replace uses of sube and subc here - if (!SDValue(SUBCNode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo); - - if (!SDValue(SUBENode, 0).use_empty()) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi); - - return true; -} - -static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - selectMADD(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - -static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const MipsSubtarget *Subtarget) { - if (DCI.isBeforeLegalize()) - return SDValue(); - - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - selectMSUB(N, &DAG)) - return SDValue(N, 0); - - return SDValue(); -} - static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget *Subtarget) { @@ -736,8 +421,8 @@ static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, EVT Ty = N->getValueType(0); unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; - unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : - MipsISD::DivRemU; + unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 : + MipsISD::DivRemU16; DebugLoc DL = N->getDebugLoc(); SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue, @@ -791,8 +476,9 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { } -// Returns true if condition code has to be inverted. -static bool invertFPCondCode(Mips::CondCode CC) { +/// This function returns true if the floating point conditional branches and +/// conditional moves which use condition code CC should be inverted. +static bool invertFPCondCodeUser(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return false; @@ -828,9 +514,8 @@ static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { // Creates and returns a CMovFPT/F node. static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, DebugLoc DL) { - bool invert = invertFPCondCode((Mips::CondCode) - cast<ConstantSDNode>(Cond.getOperand(2)) - ->getSExtValue()); + ConstantSDNode *CC = cast<ConstantSDNode>(Cond.getOperand(2)); + bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue()); return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL, True.getValueType(), True, False, Cond); @@ -997,10 +682,6 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) switch (Opc) { default: break; - case ISD::ADDE: - return performADDECombine(N, DAG, DCI, Subtarget); - case ISD::SUBE: - return performSUBECombine(N, DAG, DCI, Subtarget); case ISD::SDIVREM: case ISD::UDIVREM: return performDivRemCombine(N, DAG, DCI, Subtarget); @@ -1042,32 +723,32 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::BR_JT: return lowerBR_JT(Op, DAG); - case ISD::BRCOND: return lowerBRCOND(Op, DAG); - case ISD::ConstantPool: return lowerConstantPool(Op, DAG); - case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); - case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); - case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); - case ISD::JumpTable: return lowerJumpTable(Op, DAG); - case ISD::SELECT: return lowerSELECT(Op, DAG); - case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); - case ISD::SETCC: return lowerSETCC(Op, DAG); - case ISD::VASTART: return lowerVASTART(Op, DAG); - case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); - case ISD::FABS: return lowerFABS(Op, DAG); - case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); - case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); - case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); - case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG); - case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); - case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); - case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); - case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); - case ISD::LOAD: return lowerLOAD(Op, DAG); - case ISD::STORE: return lowerSTORE(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); - case ISD::ADD: return lowerADD(Op, DAG); + case ISD::BR_JT: return lowerBR_JT(Op, DAG); + case ISD::BRCOND: return lowerBRCOND(Op, DAG); + case ISD::ConstantPool: return lowerConstantPool(Op, DAG); + case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); + case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); + case ISD::JumpTable: return lowerJumpTable(Op, DAG); + case ISD::SELECT: return lowerSELECT(Op, DAG); + case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); + case ISD::SETCC: return lowerSETCC(Op, DAG); + case ISD::VASTART: return lowerVASTART(Op, DAG); + case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); + case ISD::FABS: return lowerFABS(Op, DAG); + case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); + case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); + case ISD::MEMBARRIER: return lowerMEMBARRIER(Op, DAG); + case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); + case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); + case ISD::LOAD: return lowerLOAD(Op, DAG); + case ISD::STORE: return lowerSTORE(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::ADD: return lowerADD(Op, DAG); } return SDValue(); } @@ -1087,358 +768,6 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) return VReg; } -// Get fp branch code (not opcode) from condition code. -static Mips::FPBranchCode getFPBranchCodeFromCond(Mips::CondCode CC) { - if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) - return Mips::BRANCH_T; - - assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) && - "Invalid CondCode."); - - return Mips::BRANCH_F; -} - -MachineBasicBlock * -MipsTargetLowering::emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ - // $bb: - // bposge32_pseudo $vr0 - // => - // $bb: - // bposge32 $tbb - // $fbb: - // li $vr2, 0 - // b $sink - // $tbb: - // li $vr1, 1 - // $sink: - // $vr0 = phi($vr2, $fbb, $vr1, $tbb) - - MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - const TargetRegisterClass *RC = &Mips::CPURegsRegClass; - DebugLoc DL = MI->getDebugLoc(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); - MachineFunction *F = BB->getParent(); - MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, FBB); - F->insert(It, TBB); - F->insert(It, Sink); - - // Transfer the remainder of BB and its successor edges to Sink. - Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - Sink->transferSuccessorsAndUpdatePHIs(BB); - - // Add successors. - BB->addSuccessor(FBB); - BB->addSuccessor(TBB); - FBB->addSuccessor(Sink); - TBB->addSuccessor(Sink); - - // Insert the real bposge32 instruction to $BB. - BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); - - // Fill $FBB. - unsigned VR2 = RegInfo.createVirtualRegister(RC); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) - .addReg(Mips::ZERO).addImm(0); - BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); - - // Fill $TBB. - unsigned VR1 = RegInfo.createVirtualRegister(RC); - BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) - .addReg(Mips::ZERO).addImm(1); - - // Insert phi function to $Sink. - BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), - MI->getOperand(0).getReg()) - .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return Sink; -} - -MachineBasicBlock *MipsTargetLowering::emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc)).addReg(MI->getOperand(3).getReg()) - .addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - -MachineBasicBlock *MipsTargetLowering::emitSelT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addReg(MI->getOperand(4).getReg()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock *MipsTargetLowering::emitSeliT16 - (unsigned Opc1, unsigned Opc2, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - BuildMI(BB, DL, TII->get(Opc2)).addReg(MI->getOperand(3).getReg()) - .addImm(MI->getOperand(4).getImm()); - BuildMI(BB, DL, TII->get(Opc1)).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; - - BuildMI(*BB, BB->begin(), DL, - TII->get(Mips::PHI), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} - - -MachineBasicBlock - *MipsTargetLowering::emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - unsigned regY = MI->getOperand(1).getReg(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -MachineBasicBlock *MipsTargetLowering::emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned regX = MI->getOperand(0).getReg(); - int64_t imm = MI->getOperand(1).getImm(); - MachineBasicBlock *target = MI->getOperand(2).getMBB(); - unsigned CmpOpc; - if (isUInt<8>(imm)) - CmpOpc = CmpiOpc; - else if (isUInt<16>(imm)) - CmpOpc = CmpiXOpc; - else - llvm_unreachable("immediate field not usable"); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(CmpOpc)).addReg(regX).addImm(imm); - BuildMI(*BB, MI, MI->getDebugLoc(), TII->get(BtOpc)).addMBB(target); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} - - -static unsigned Mips16WhichOp8uOr16simm - (unsigned shortOp, unsigned longOp, int64_t Imm) { - if (isUInt<8>(Imm)) - return shortOp; - else if (isInt<16>(Imm)) - return longOp; - else - llvm_unreachable("immediate field not usable"); -} - -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - unsigned regY = MI->getOperand(2).getReg(); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addReg(regY); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} -MachineBasicBlock *MipsTargetLowering::emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const { - if (DontExpandCondPseudos16) - return BB; - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - unsigned CC = MI->getOperand(0).getReg(); - unsigned regX = MI->getOperand(1).getReg(); - int64_t Imm = MI->getOperand(2).getImm(); - unsigned SltOpc = Mips16WhichOp8uOr16simm(SltiOpc, SltiXOpc, Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(SltOpc)).addReg(regX).addImm(Imm); - BuildMI(*BB, MI, MI->getDebugLoc(), - TII->get(Mips::MoveR3216), CC).addReg(Mips::T8); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - -} MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -1548,77 +877,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case Mips::ATOMIC_CMP_SWAP_I64: case Mips::ATOMIC_CMP_SWAP_I64_P8: return emitAtomicCmpSwap(MI, BB, 8); - case Mips::BPOSGE32_PSEUDO: - return emitBPOSGE32(MI, BB); - case Mips::SelBeqZ: - return emitSel16(Mips::BeqzRxImm16, MI, BB); - case Mips::SelBneZ: - return emitSel16(Mips::BnezRxImm16, MI, BB); - case Mips::SelTBteqZCmpi: - return emitSeliT16(Mips::BteqzX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBteqZSlti: - return emitSeliT16(Mips::BteqzX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBteqZSltiu: - return emitSeliT16(Mips::BteqzX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBtneZCmpi: - return emitSeliT16(Mips::BtnezX16, Mips::CmpiRxImmX16, MI, BB); - case Mips::SelTBtneZSlti: - return emitSeliT16(Mips::BtnezX16, Mips::SltiRxImmX16, MI, BB); - case Mips::SelTBtneZSltiu: - return emitSeliT16(Mips::BtnezX16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SelTBteqZCmp: - return emitSelT16(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBteqZSlt: - return emitSelT16(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBteqZSltu: - return emitSelT16(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::SelTBtneZCmp: - return emitSelT16(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::SelTBtneZSlt: - return emitSelT16(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::SelTBtneZSltu: - return emitSelT16(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::CmpRxRy16, MI, BB); - case Mips::BteqzT8SltX16: - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltRxRy16, MI, BB); - case Mips::BteqzT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BteqzX16, Mips::SltuRxRy16, MI, BB); - case Mips::BtnezT8CmpX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::CmpRxRy16, MI, BB); - case Mips::BtnezT8SltX16: - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltRxRy16, MI, BB); - case Mips::BtnezT8SltuX16: - // TBD: figure out a way to get this or remove the instruction - // altogether. - return emitFEXT_T8I816_ins(Mips::BtnezX16, Mips::SltuRxRy16, MI, BB); - case Mips::BteqzT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BteqzT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BteqzT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BteqzX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::BtnezT8CmpiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::CmpiRxImm16, Mips::CmpiRxImmX16, MI, BB); - case Mips::BtnezT8SltiX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::BtnezT8SltiuX16: return emitFEXT_T8I8I16_ins( - Mips::BtnezX16, Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - break; - case Mips::SltCCRxRy16: - return emitFEXT_CCRX16_ins(Mips::SltRxRy16, MI, BB); - break; - case Mips::SltiCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiRxImm16, Mips::SltiRxImmX16, MI, BB); - case Mips::SltiuCCRxImmX16: - return emitFEXT_CCRXI16_ins - (Mips::SltiuRxImm16, Mips::SltiuRxImmX16, MI, BB); - case Mips::SltuCCRxRy16: - return emitFEXT_CCRX16_ins - (Mips::SltuRxRy16, MI, BB); } } @@ -2140,8 +1398,8 @@ lowerBRCOND(SDValue Op, SelectionDAG &DAG) const SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue(); - SDValue BrCode = DAG.getConstant(getFPBranchCodeFromCond(CC), MVT::i32); - + unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T; + SDValue BrCode = DAG.getConstant(Opc, MVT::i32); return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, Dest, CondRes); } @@ -2792,6 +2050,22 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { return CreateStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } +static SDValue initAccumulator(SDValue In, DebugLoc DL, SelectionDAG &DAG) { + SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, + DAG.getConstant(0, MVT::i32)); + SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In, + DAG.getConstant(1, MVT::i32)); + return DAG.getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, InLo, InHi); +} + +static SDValue extractLOHI(SDValue Op, DebugLoc DL, SelectionDAG &DAG) { + SDValue Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, + DAG.getConstant(Mips::sub_lo, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, MVT::i32, Op, + DAG.getConstant(Mips::sub_hi, MVT::i32)); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); +} + // This function expands mips intrinsic nodes which have 64-bit input operands // or output values. // @@ -2804,48 +2078,51 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { // v1 = copy hi // out64 = merge-values (v0, v1) // -static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, - unsigned Opc, bool HasI64In, bool HasI64Out) { +static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { DebugLoc DL = Op.getDebugLoc(); bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; - SDValue Chain = HasChainIn ? Op->getOperand(0) : DAG.getEntryNode(); SmallVector<SDValue, 3> Ops; + unsigned OpNo = 0; - if (HasI64In) { - SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, - Op->getOperand(1 + HasChainIn), - DAG.getConstant(0, MVT::i32)); - SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, - Op->getOperand(1 + HasChainIn), - DAG.getConstant(1, MVT::i32)); + // See if Op has a chain input. + if (HasChainIn) + Ops.push_back(Op->getOperand(OpNo++)); - Chain = DAG.getCopyToReg(Chain, DL, Mips::LO, InLo, SDValue()); - Chain = DAG.getCopyToReg(Chain, DL, Mips::HI, InHi, Chain.getValue(1)); + // The next operand is the intrinsic opcode. + assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); - Ops.push_back(Chain); - Ops.append(Op->op_begin() + HasChainIn + 2, Op->op_end()); - Ops.push_back(Chain.getValue(1)); - } else { - Ops.push_back(Chain); - Ops.append(Op->op_begin() + HasChainIn + 1, Op->op_end()); - } + // See if the next operand has type i64. + SDValue Opnd = Op->getOperand(++OpNo), In64; + + if (Opnd.getValueType() == MVT::i64) + In64 = initAccumulator(Opnd, DL, DAG); + else + Ops.push_back(Opnd); + + // Push the remaining operands. + for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) + Ops.push_back(Op->getOperand(OpNo)); + + // Add In64 to the end of the list. + if (In64.getNode()) + Ops.push_back(In64); - if (!HasI64Out) - return DAG.getNode(Opc, DL, Op->value_begin(), Op->getNumValues(), - Ops.begin(), Ops.size()); + // Scan output. + SmallVector<EVT, 2> ResTys; - SDValue Intr = DAG.getNode(Opc, DL, DAG.getVTList(MVT::Other, MVT::Glue), - Ops.begin(), Ops.size()); - SDValue OutLo = DAG.getCopyFromReg(Intr.getValue(0), DL, Mips::LO, MVT::i32, - Intr.getValue(1)); - SDValue OutHi = DAG.getCopyFromReg(OutLo.getValue(1), DL, Mips::HI, MVT::i32, - OutLo.getValue(2)); - SDValue Out = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, OutLo, OutHi); + for (SDNode::value_iterator I = Op->value_begin(), E = Op->value_end(); + I != E; ++I) + ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); + + // Create node. + SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); + SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; if (!HasChainIn) return Out; - SDValue Vals[] = { Out, OutHi.getValue(1) }; + assert(Val->getValueType(1) == MVT::Other); + SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; return DAG.getMergeValues(Vals, 2, DL); } @@ -2855,37 +2132,37 @@ SDValue MipsTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, default: return SDValue(); case Intrinsic::mips_shilo: - return lowerDSPIntr(Op, DAG, MipsISD::SHILO, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::SHILO); case Intrinsic::mips_dpau_h_qbl: - return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); case Intrinsic::mips_dpau_h_qbr: - return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); case Intrinsic::mips_dpsu_h_qbl: - return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); case Intrinsic::mips_dpsu_h_qbr: - return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); case Intrinsic::mips_dpa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); case Intrinsic::mips_dps_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); case Intrinsic::mips_dpax_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); case Intrinsic::mips_dpsx_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); case Intrinsic::mips_mulsa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); case Intrinsic::mips_mult: - return lowerDSPIntr(Op, DAG, MipsISD::MULT, false, true); + return lowerDSPIntr(Op, DAG, MipsISD::Mult); case Intrinsic::mips_multu: - return lowerDSPIntr(Op, DAG, MipsISD::MULTU, false, true); + return lowerDSPIntr(Op, DAG, MipsISD::Multu); case Intrinsic::mips_madd: - return lowerDSPIntr(Op, DAG, MipsISD::MADD_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAdd); case Intrinsic::mips_maddu: - return lowerDSPIntr(Op, DAG, MipsISD::MADDU_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAddu); case Intrinsic::mips_msub: - return lowerDSPIntr(Op, DAG, MipsISD::MSUB_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MSub); case Intrinsic::mips_msubu: - return lowerDSPIntr(Op, DAG, MipsISD::MSUBU_DSP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MSubu); } } @@ -2895,45 +2172,45 @@ SDValue MipsTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, default: return SDValue(); case Intrinsic::mips_extp: - return lowerDSPIntr(Op, DAG, MipsISD::EXTP, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTP); case Intrinsic::mips_extpdp: - return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); case Intrinsic::mips_extr_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); case Intrinsic::mips_extr_r_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); case Intrinsic::mips_extr_rs_w: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); case Intrinsic::mips_extr_s_h: - return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H, true, false); + return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); case Intrinsic::mips_mthlip: - return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); case Intrinsic::mips_mulsaq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); case Intrinsic::mips_maq_s_w_phl: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); case Intrinsic::mips_maq_s_w_phr: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); case Intrinsic::mips_maq_sa_w_phl: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); case Intrinsic::mips_maq_sa_w_phr: - return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); case Intrinsic::mips_dpaq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); case Intrinsic::mips_dpsq_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); case Intrinsic::mips_dpaq_sa_l_w: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); case Intrinsic::mips_dpsq_sa_l_w: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); case Intrinsic::mips_dpaqx_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); case Intrinsic::mips_dpaqx_sa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); case Intrinsic::mips_dpsqx_s_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); case Intrinsic::mips_dpsqx_sa_w_ph: - return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH, true, true); + return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); } } @@ -3074,28 +2351,6 @@ static unsigned getNextIntArgReg(unsigned Reg) { return (Reg == Mips::A0) ? Mips::A1 : Mips::A3; } -/// isEligibleForTailCallOptimization - Check whether the call is eligible -/// for tail call optimization. -bool MipsTargetLowering:: -isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const { - if (!EnableMipsTailCalls) - return false; - - // No tail call optimization for mips16. - if (Subtarget->inMips16Mode()) - return false; - - // Return false if either the callee or caller has a byval argument. - if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) - return false; - - // Return true if the callee's argument area is no larger than the - // caller's. - return NextStackOffset <= FI.getIncomingArgSize(); -} - SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, DebugLoc DL, @@ -3114,161 +2369,48 @@ MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, /*isVolatile=*/ true, false, 0); } -// -// The Mips16 hard float is a crazy quilt inherited from gcc. I have a much -// cleaner way to do all of this but it will have to wait until the traditional -// gcc mechanism is completed. -// -// For Pic, in order for Mips16 code to call Mips32 code which according the abi -// have either arguments or returned values placed in floating point registers, -// we use a set of helper functions. (This includes functions which return type -// complex which on Mips are returned in a pair of floating point registers). -// -// This is an encoding that we inherited from gcc. -// In Mips traditional O32, N32 ABI, floating point numbers are passed in -// floating point argument registers 1,2 only when the first and optionally -// the second arguments are float (sf) or double (df). -// For Mips16 we are only concerned with the situations where floating point -// arguments are being passed in floating point registers by the ABI, because -// Mips16 mode code cannot execute floating point instructions to load those -// values and hence helper functions are needed. -// The possibilities are (), (sf), (sf, sf), (sf, df), (df), (df, sf), (df, df) -// the helper function suffixs for these are: -// 0, 1, 5, 9, 2, 6, 10 -// this suffix can then be calculated as follows: -// for a given argument Arg: -// Arg1x, Arg2x = 1 : Arg is sf -// 2 : Arg is df -// 0: Arg is neither sf or df -// So this stub is the string for number Arg1x + Arg2x*4. -// However not all numbers between 0 and 10 are possible, we check anyway and -// assert if the impossible exists. -// - -unsigned int MipsTargetLowering::getMips16HelperFunctionStubNumber - (ArgListTy &Args) const { - unsigned int resultNum = 0; - if (Args.size() >= 1) { - Type *t = Args[0].Ty; - if (t->isFloatTy()) { - resultNum = 1; - } - else if (t->isDoubleTy()) { - resultNum = 2; - } - } - if (resultNum) { - if (Args.size() >=2) { - Type *t = Args[1].Ty; - if (t->isFloatTy()) { - resultNum += 4; - } - else if (t->isDoubleTy()) { - resultNum += 8; - } - } +void MipsTargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // Insert node "GP copy globalreg" before call to function. + // + // R_MIPS_CALL* operators (emitted when non-internal functions are called + // in PIC mode) allow symbols to be resolved via lazy binding. + // The lazy binding stub requires GP to point to the GOT. + if (IsPICCall && !InternalLinkage) { + unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; + EVT Ty = IsN64 ? MVT::i64 : MVT::i32; + RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } - return resultNum; -} -// -// prefixs are attached to stub numbers depending on the return type . -// return type: float sf_ -// double df_ -// single complex sc_ -// double complext dc_ -// others NO PREFIX -// -// -// The full name of a helper function is__mips16_call_stub + -// return type dependent prefix + stub number -// -// -// This is something that probably should be in a different source file and -// perhaps done differently but my main purpose is to not waste runtime -// on something that we can enumerate in the source. Another possibility is -// to have a python script to generate these mapping tables. This will do -// for now. There are a whole series of helper function mapping arrays, one -// for each return type class as outlined above. There there are 11 possible -// entries. Ones with 0 are ones which should never be selected -// -// All the arrays are similar except for ones which return neither -// sf, df, sc, dc, in which only care about ones which have sf or df as a -// first parameter. -// -#define P_ "__mips16_call_stub_" -#define MAX_STUB_NUMBER 10 -#define T1 P "1", P "2", 0, 0, P "5", P "6", 0, 0, P "9", P "10" -#define T P "0" , T1 -#define P P_ -static char const * vMips16Helper[MAX_STUB_NUMBER+1] = - {0, T1 }; -#undef P -#define P P_ "sf_" -static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "df_" -static char const * dfMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "sc_" -static char const * scMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#define P P_ "dc_" -static char const * dcMips16Helper[MAX_STUB_NUMBER+1] = - { T }; -#undef P -#undef P_ - - -const char* MipsTargetLowering:: - getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const { - const unsigned int stubNum = getMips16HelperFunctionStubNumber(Args); -#ifndef NDEBUG - const unsigned int maxStubNum = 10; - assert(stubNum <= maxStubNum); - const bool validStubNum[maxStubNum+1] = - {true, true, true, false, false, true, true, false, false, true, true}; - assert(validStubNum[stubNum]); -#endif - const char *result; - if (RetTy->isFloatTy()) { - result = sfMips16Helper[stubNum]; - } - else if (RetTy ->isDoubleTy()) { - result = dfMips16Helper[stubNum]; - } - else if (RetTy->isStructTy()) { - // check if it's complex - if (RetTy->getNumContainedTypes() == 2) { - if ((RetTy->getContainedType(0)->isFloatTy()) && - (RetTy->getContainedType(1)->isFloatTy())) { - result = scMips16Helper[stubNum]; - } - else if ((RetTy->getContainedType(0)->isDoubleTy()) && - (RetTy->getContainedType(1)->isDoubleTy())) { - result = dcMips16Helper[stubNum]; - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - llvm_unreachable("Uncovered condition"); - } - } - else { - if (stubNum == 0) { - needHelper = false; - return ""; - } - result = vMips16Helper[stubNum]; + // Build a sequence of copy-to-reg nodes chained together with token + // chain and flag operands which copy the outgoing args into registers. + // The InFlag in necessary since all emitted instructions must be + // stuck together. + SDValue InFlag; + + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } - needHelper = true; - return result; + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(CLI.DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(CLI.DAG.getRegisterMask(Mask)); + + if (InFlag.getNode()) + Ops.push_back(InFlag); } /// LowerCall - functions arguments are copied from virtual regs to @@ -3287,26 +2429,6 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; - const char* Mips16HelperFunction = 0; - bool NeedMips16Helper = false; - - if (Subtarget->inMips16Mode() && getTargetMachine().Options.UseSoftFloat && - Mips16HardFloat) { - // - // currently we don't have symbols tagged with the mips16 or mips32 - // qualifier so we will assume that we don't know what kind it is. - // and generate the helper - // - bool LookupHelper = true; - if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - if (NoHelperNeeded.find(S->getSymbol()) != NoHelperNeeded.end()) { - LookupHelper = false; - } - } - if (LookupHelper) Mips16HelperFunction = - getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); - - } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); @@ -3466,80 +2588,17 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, GlobalOrExternal = true; } - SDValue JumpTarget = Callee; - - // T9 should contain the address of the callee function if - // -reloction-model=pic or it is an indirect call. - if (IsPICCall || !GlobalOrExternal) { - unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; - unsigned V0Reg = Mips::V0; - if (NeedMips16Helper) { - RegsToPass.push_front(std::make_pair(V0Reg, Callee)); - JumpTarget = DAG.getExternalSymbol( - Mips16HelperFunction, getPointerTy()); - JumpTarget = getAddrGlobal(JumpTarget, DAG, MipsII::MO_GOT); - } - else { - RegsToPass.push_front(std::make_pair(T9Reg, Callee)); - - if (!Subtarget->inMips16Mode()) - JumpTarget = SDValue(); - } - } - - // Insert node "GP copy globalreg" before call to function. - // - // R_MIPS_CALL* operators (emitted when non-internal functions are called - // in PIC mode) allow symbols to be resolved via lazy binding. - // The lazy binding stub requires GP to point to the GOT. - if (IsPICCall && !InternalLinkage) { - unsigned GPReg = IsN64 ? Mips::GP_64 : Mips::GP; - EVT Ty = IsN64 ? MVT::i64 : MVT::i32; - RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(DAG, Ty))); - } - - // Build a sequence of copy-to-reg nodes chained together with token - // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emitted instructions must be - // stuck together. - SDValue InFlag; - - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // MipsJmpLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... - // - // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SmallVector<SDValue, 8> Ops(1, Chain); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - if (JumpTarget.getNode()) - Ops.push_back(JumpTarget); - - // Add argument registers to the end of the list so that they are - // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - if (InFlag.getNode()) - Ops.push_back(InFlag); + getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage, + CLI, Callee, Chain); if (IsTailCall) return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size()); Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); + SDValue InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, @@ -4124,14 +3183,14 @@ static bool isF128SoftLibCall(const char *CallSym) { const char * const *End = LibCalls + array_lengthof(LibCalls); // Check that LibCalls is sorted alphabetically. -#ifndef NDEBUG - LTStr Comp; + MipsTargetLowering::LTStr Comp; +#ifndef NDEBUG for (const char * const *I = LibCalls; I < End - 1; ++I) assert(Comp(*I, *(I + 1))); #endif - return std::binary_search(LibCalls, End, CallSym, LTStr()); + return std::binary_search(LibCalls, End, CallSym, Comp); } /// This function returns true if Ty is fp128 or i128 which was originally a diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index de925e16ab..cab71a61e0 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -68,6 +68,16 @@ namespace llvm { EH_RETURN, + // Node used to extract integer from accumulator. + ExtractLOHI, + + // Node used to insert integers to accumulator. + InsertLOHI, + + // Mult nodes. + Mult, + Multu, + // MAdd/Sub nodes MAdd, MAddu, @@ -77,6 +87,8 @@ namespace llvm { // DivRem(u) DivRem, DivRemU, + DivRem16, + DivRemU16, BuildPairF64, ExtractElementF64, @@ -152,9 +164,9 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + static const MipsTargetLowering *create(MipsTargetMachine &TM); - virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const; + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, @@ -177,17 +189,34 @@ namespace llvm { EVT getSetCCResultType(EVT VT) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - private: - void setMips16LibcallName(RTLIB::Libcall, const char *Name); + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + struct LTStr { + bool operator()(const char *S1, const char *S2) const { + return strcmp(S1, S2) < 0; + } + }; - void setMips16HardFloatLibCalls(); + protected: + SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const; - unsigned int - getMips16HelperFunctionStubNumber(ArgListTy &Args) const; + SDValue getAddrLocal(SDValue Op, SelectionDAG &DAG, bool HasMips64) const; - const char *getMips16HelperFunction - (Type* RetTy, ArgListTy &Args, bool &needHelper) const; + SDValue getAddrGlobal(SDValue Op, SelectionDAG &DAG, unsigned Flag) const; + + SDValue getAddrGlobalLargeGOT(SDValue Op, SelectionDAG &DAG, + unsigned HiFlag, unsigned LoFlag) const; + + /// This function fills Ops, which is the list of operands that will later + /// be used when a function call node is created. It also generates + /// copyToReg nodes to set up argument registers. + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; /// ByValArgInfo - Byval argument information. struct ByValArgInfo { @@ -283,6 +312,7 @@ namespace llvm { bool HasMips64, IsN64, IsO32; + private: // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -321,9 +351,10 @@ namespace llvm { /// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. - bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const; + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const = 0; /// copyByValArg - Copy argument registers which were used to pass a byval /// argument to the stack. Create a stack frame object for the byval @@ -377,10 +408,6 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; - // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; @@ -419,8 +446,6 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; - MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, - MachineBasicBlock *BB) const; MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI, @@ -430,29 +455,11 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; - MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitSeliT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - - MachineBasicBlock *emitSelT16(unsigned Opc1, unsigned Opc2, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I816_ins(unsigned BtOpc, unsigned CmpOpc, - MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_T8I8I16_ins( - unsigned BtOpc, unsigned CmpiOpc, unsigned CmpiXOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRX16_ins( - unsigned SltOpc, - MachineInstr *MI, MachineBasicBlock *BB) const; - MachineBasicBlock *emitFEXT_CCRXI16_ins( - unsigned SltiOpc, unsigned SltiXOpc, - MachineInstr *MI, MachineBasicBlock *BB )const; - }; + + /// Create MipsTargetLowering objects. + const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM); + const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM); } #endif // MipsISELLOWERING_H diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 891bdc1224..6b23057c9c 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -503,32 +503,27 @@ let Predicates = [IsFP64bit, HasStdEnc] in { def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; } -// Load/Store patterns. +// Patterns for loads/stores with a reg+imm operand. let AddedComplexity = 40 in { let Predicates = [IsN64, HasStdEnc] in { - def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1_P8 addrRegImm:$a)>; - def : MipsPat<(store FGR32:$v, addrRegImm:$a), - (SWC1_P8 FGR32:$v, addrRegImm:$a)>; - def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164_P8 addrRegImm:$a)>; - def : MipsPat<(store FGR64:$v, addrRegImm:$a), - (SDC164_P8 FGR64:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LWC1_P8, f32, load>; + def : StoreRegImmPat<SWC1_P8, f32>; + def : LoadRegImmPat<LDC164_P8, f64, load>; + def : StoreRegImmPat<SDC164_P8, f64>; } let Predicates = [NotN64, HasStdEnc] in { - def : MipsPat<(f32 (load addrRegImm:$a)), (LWC1 addrRegImm:$a)>; - def : MipsPat<(store FGR32:$v, addrRegImm:$a), - (SWC1 FGR32:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LWC1, f32, load>; + def : StoreRegImmPat<SWC1, f32>; } let Predicates = [NotN64, HasMips64, HasStdEnc] in { - def : MipsPat<(f64 (load addrRegImm:$a)), (LDC164 addrRegImm:$a)>; - def : MipsPat<(store FGR64:$v, addrRegImm:$a), - (SDC164 FGR64:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LDC164, f64, load>; + def : StoreRegImmPat<SDC164, f64>; } let Predicates = [NotN64, NotMips64, HasStdEnc] in { - def : MipsPat<(f64 (load addrRegImm:$a)), (LDC1 addrRegImm:$a)>; - def : MipsPat<(store AFGR64:$v, addrRegImm:$a), - (SDC1 AFGR64:$v, addrRegImm:$a)>; + def : LoadRegImmPat<LDC1, f64, load>; + def : StoreRegImmPat<SDC1, f64>; } } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 3cd9088140..8c05d97bea 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -86,6 +86,36 @@ public: /// Return the number of bytes of code the specified instruction may be. unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); + } + + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); + } + + virtual void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const = 0; + + virtual void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const = 0; + protected: bool isZeroImm(const MachineOperand &op) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 25b5d240be..3a82e81713 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -23,13 +23,16 @@ def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisInt<4>]>; def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDT_MipsMAddMSub : SDTypeProfile<0, 4, - [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisSameAs<2, 3>]>; -def SDT_MipsDivRem : SDTypeProfile<0, 2, - [SDTCisInt<0>, - SDTCisSameAs<0, 1>]>; +def SDT_ExtractLOHI : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVT<1, untyped>, + SDTCisVT<2, i32>]>; +def SDT_InsertLOHI : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, + SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_MipsMultDiv : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisInt<1>, + SDTCisSameAs<1, 2>]>; +def SDT_MipsMAddMSub : SDTypeProfile<1, 3, + [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>, + SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_MipsDivRem16 : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>]>; def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; @@ -82,20 +85,27 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; +// Node used to extract integer from LO/HI register. +def ExtractLOHI : SDNode<"MipsISD::ExtractLOHI", SDT_ExtractLOHI>; + +// Node used to insert 32-bit integers to LOHI register pair. +def InsertLOHI : SDNode<"MipsISD::InsertLOHI", SDT_InsertLOHI>; + +// Mult nodes. +def MipsMult : SDNode<"MipsISD::Mult", SDT_MipsMultDiv>; +def MipsMultu : SDNode<"MipsISD::Multu", SDT_MipsMultDiv>; + // MAdd*/MSub* nodes -def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; -def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; -def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; -def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub, - [SDNPOptInGlue, SDNPOutGlue]>; +def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub>; +def MipsMAddu : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub>; +def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub>; +def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub>; // DivRem(u) nodes -def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem, - [SDNPOutGlue]>; -def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, +def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsMultDiv>; +def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsMultDiv>; +def MipsDivRem16 : SDNode<"MipsISD::DivRem16", SDT_MipsDivRem16, [SDNPOutGlue]>; +def MipsDivRemU16 : SDNode<"MipsISD::DivRemU16", SDT_MipsDivRem16, [SDNPOutGlue]>; // Target constant nodes that are not part of any isel patterns and remain @@ -256,6 +266,7 @@ def mem : Operand<i32> { let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem64 : Operand<i64> { @@ -263,18 +274,21 @@ def mem64 : Operand<i64> { let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; let ParserMatchClass = MipsMemAsmOperand; + let OperandType = "OPERAND_MEMORY"; } def mem_ea : Operand<i32> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPURegs, simm16); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } def mem_ea_64 : Operand<i64> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops CPU64Regs, simm16_64); let EncoderMethod = "getMemEncoding"; + let OperandType = "OPERAND_MEMORY"; } // size operand of ext instruction @@ -378,10 +392,9 @@ class ArithLogicI<string opstr, Operand Od, RegisterOperand RO, } // Arithmetic Multiply ADD/SUB -class MArithR<string opstr, SDPatternOperator op = null_frag, bit isComm = 0> : +class MArithR<string opstr, bit isComm = 0> : InstSE<(outs), (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt), - !strconcat(opstr, "\t$rs, $rt"), - [(op CPURegsOpnd:$rs, CPURegsOpnd:$rt, LO, HI)], IIImul, FrmR> { + !strconcat(opstr, "\t$rs, $rt"), [], IIImul, FrmR> { let Defs = [HI, LO]; let Uses = [HI, LO]; let isCommutable = isComm; @@ -427,33 +440,39 @@ class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern, // Memory Load/Store class Load<string opstr, SDPatternOperator OpNode, RegisterClass RC, - Operand MemOpnd> : + Operand MemOpnd, ComplexPattern Addr> : InstSE<(outs RC:$rt), (ins MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(set RC:$rt, (OpNode addr:$addr))], NoItinerary, FrmI> { + [(set RC:$rt, (OpNode Addr:$addr))], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; let canFoldAsLoad = 1; + let mayLoad = 1; } class Store<string opstr, SDPatternOperator OpNode, RegisterClass RC, - Operand MemOpnd> : + Operand MemOpnd, ComplexPattern Addr> : InstSE<(outs), (ins RC:$rt, MemOpnd:$addr), !strconcat(opstr, "\t$rt, $addr"), - [(OpNode RC:$rt, addr:$addr)], NoItinerary, FrmI> { + [(OpNode RC:$rt, Addr:$addr)], NoItinerary, FrmI> { let DecoderMethod = "DecodeMem"; + let mayStore = 1; } multiclass LoadM<string opstr, RegisterClass RC, - SDPatternOperator OpNode = null_frag> { - def NAME : Load<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>; - def _P8 : Load<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> { + SDPatternOperator OpNode = null_frag, + ComplexPattern Addr = addr> { + def NAME : Load<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>; + def _P8 : Load<opstr, OpNode, RC, mem64, Addr>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } } multiclass StoreM<string opstr, RegisterClass RC, - SDPatternOperator OpNode = null_frag> { - def NAME : Store<opstr, OpNode, RC, mem>, Requires<[NotN64, HasStdEnc]>; - def _P8 : Store<opstr, OpNode, RC, mem64>, Requires<[IsN64, HasStdEnc]> { + SDPatternOperator OpNode = null_frag, + ComplexPattern Addr = addr> { + def NAME : Store<opstr, OpNode, RC, mem, Addr>, Requires<[NotN64, HasStdEnc]>; + def _P8 : Store<opstr, OpNode, RC, mem64, Addr>, + Requires<[IsN64, HasStdEnc]> { let DecoderNamespace = "Mips64"; let isCodeGenOnly = 1; } @@ -623,11 +642,34 @@ class Mult<string opstr, InstrItinClass itin, RegisterOperand RO, let neverHasSideEffects = 1; } -class Div<SDNode op, string opstr, InstrItinClass itin, RegisterOperand RO, +// Pseudo multiply/divide instruction with explicit accumulator register +// operands. +class MultDivPseudo<Instruction RealInst, RegisterClass R0, RegisterOperand R1, + SDPatternOperator OpNode, InstrItinClass Itin, + bit IsComm = 1, bit HasSideEffects = 0> : + PseudoSE<(outs R0:$ac), (ins R1:$rs, R1:$rt), + [(set R0:$ac, (OpNode R1:$rs, R1:$rt))], Itin>, + PseudoInstExpansion<(RealInst R1:$rs, R1:$rt)> { + let isCommutable = IsComm; + let hasSideEffects = HasSideEffects; +} + +// Pseudo multiply add/sub instruction with explicit accumulator register +// operands. +class MAddSubPseudo<Instruction RealInst, SDPatternOperator OpNode> + : PseudoSE<(outs ACRegs:$ac), + (ins CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin), + [(set ACRegs:$ac, + (OpNode CPURegsOpnd:$rs, CPURegsOpnd:$rt, ACRegs:$acin))], + IIImul>, + PseudoInstExpansion<(RealInst CPURegsOpnd:$rs, CPURegsOpnd:$rt)> { + string Constraints = "$acin = $ac"; +} + +class Div<string opstr, InstrItinClass itin, RegisterOperand RO, list<Register> DefRegs> : - InstSE<(outs), (ins RO:$rs, RO:$rt), - !strconcat(opstr, "\t$$zero, $rs, $rt"), [(op RO:$rs, RO:$rt)], itin, - FrmR> { + InstSE<(outs), (ins RO:$rs, RO:$rt), !strconcat(opstr, "\t$$zero, $rs, $rt"), + [], itin, FrmR> { let Defs = DefRegs; } @@ -790,6 +832,14 @@ let usesCustomInserter = 1 in { defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32<atomic_cmp_swap_32>; } +/// Pseudo instructions for loading, storing and copying accumulator registers. +let isPseudo = 1 in { + defm LOAD_AC64 : LoadM<"load_ac64", ACRegs>; + defm STORE_AC64 : StoreM<"store_ac64", ACRegs>; +} + +def COPY_AC64 : PseudoSE<(outs ACRegs:$dst), (ins ACRegs:$src), []>; + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -845,10 +895,10 @@ let Predicates = [HasMips32r2, HasStdEnc] in { /// Load and Store Instructions /// aligned defm LB : LoadM<"lb", CPURegs, sextloadi8>, LW_FM<0x20>; -defm LBu : LoadM<"lbu", CPURegs, zextloadi8>, LW_FM<0x24>; -defm LH : LoadM<"lh", CPURegs, sextloadi16>, LW_FM<0x21>; +defm LBu : LoadM<"lbu", CPURegs, zextloadi8, addrDefault>, LW_FM<0x24>; +defm LH : LoadM<"lh", CPURegs, sextloadi16, addrDefault>, LW_FM<0x21>; defm LHu : LoadM<"lhu", CPURegs, zextloadi16>, LW_FM<0x25>; -defm LW : LoadM<"lw", CPURegs, load>, LW_FM<0x23>; +defm LW : LoadM<"lw", CPURegs, load, addrDefault>, LW_FM<0x23>; defm SB : StoreM<"sb", CPURegs, truncstorei8>, LW_FM<0x28>; defm SH : StoreM<"sh", CPURegs, truncstorei16>, LW_FM<0x29>; defm SW : StoreM<"sw", CPURegs, store>, LW_FM<0x2b>; @@ -920,10 +970,13 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { /// Multiply and Divide Instructions. def MULT : Mult<"mult", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x18>; def MULTu : Mult<"multu", IIImul, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x19>; -def SDIV : Div<MipsDivRem, "div", IIIdiv, CPURegsOpnd, [HI, LO]>, - MULT_FM<0, 0x1a>; -def UDIV : Div<MipsDivRemU, "divu", IIIdiv, CPURegsOpnd, [HI, LO]>, - MULT_FM<0, 0x1b>; +def PseudoMULT : MultDivPseudo<MULT, ACRegs, CPURegsOpnd, MipsMult, IIImul>; +def PseudoMULTu : MultDivPseudo<MULTu, ACRegs, CPURegsOpnd, MipsMultu, IIImul>; +def SDIV : Div<"div", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1a>; +def UDIV : Div<"divu", IIIdiv, CPURegsOpnd, [HI, LO]>, MULT_FM<0, 0x1b>; +def PseudoSDIV : MultDivPseudo<SDIV, ACRegs, CPURegsOpnd, MipsDivRem, IIIdiv, 0>; +def PseudoUDIV : MultDivPseudo<UDIV, ACRegs, CPURegsOpnd, MipsDivRemU, IIIdiv, + 0>; def MTHI : MoveToLOHI<"mthi", CPURegs, [HI]>, MTLO_FM<0x11>; def MTLO : MoveToLOHI<"mtlo", CPURegs, [LO]>, MTLO_FM<0x13>; @@ -951,10 +1004,14 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; def LEA_ADDiu : EffectiveAddress<"addiu", CPURegs, mem_ea>, LW_FM<9>; // MADD*/MSUB* -def MADD : MArithR<"madd", MipsMAdd, 1>, MULT_FM<0x1c, 0>; -def MADDU : MArithR<"maddu", MipsMAddu, 1>, MULT_FM<0x1c, 1>; -def MSUB : MArithR<"msub", MipsMSub>, MULT_FM<0x1c, 4>; -def MSUBU : MArithR<"msubu", MipsMSubu>, MULT_FM<0x1c, 5>; +def MADD : MArithR<"madd", 1>, MULT_FM<0x1c, 0>; +def MADDU : MArithR<"maddu", 1>, MULT_FM<0x1c, 1>; +def MSUB : MArithR<"msub">, MULT_FM<0x1c, 4>; +def MSUBU : MArithR<"msubu">, MULT_FM<0x1c, 5>; +def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd>; +def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu>; +def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub>; +def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu>; def RDHWR : ReadHardware<CPURegs, HWRegsOpnd>, RDHWR_FM; @@ -997,6 +1054,9 @@ def : InstAlias<"and $rs, $rt, $imm", def : InstAlias<"j $rs", (JR CPURegs:$rs), 0>, Requires<[NotMips64]>; def : InstAlias<"jalr $rs", (JALR RA, CPURegs:$rs)>, Requires<[NotMips64]>; +def : InstAlias<"jal $rs", (JALR RA, CPURegs:$rs), 0>, Requires<[NotMips64]>; +def : InstAlias<"jal $rd,$rs", (JALR CPURegs:$rd, CPURegs:$rs), 0>, + Requires<[NotMips64]>; def : InstAlias<"not $rt, $rs", (NOR CPURegsOpnd:$rt, CPURegsOpnd:$rs, ZERO), 1>; def : InstAlias<"neg $rt, $rs", @@ -1008,6 +1068,9 @@ def : InstAlias<"slt $rs, $rt, $imm", def : InstAlias<"xor $rs, $rt, $imm", (XORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, Requires<[NotMips64]>; +def : InstAlias<"or $rs, $rt, $imm", + (ORi CPURegsOpnd:$rs, CPURegsOpnd:$rt, simm16:$imm), 0>, + Requires<[NotMips64]>; def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; def : InstAlias<"mfc0 $rt, $rd", (MFC0_3OP CPURegsOpnd:$rt, CPURegsOpnd:$rd, 0), 0>; @@ -1043,6 +1106,13 @@ def LoadAddr32Imm : LoadAddressImm<"la", shamt,CPURegsOpnd>; // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// +// Load/store pattern templates. +class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> : + MipsPat<(ValTy (Node addrRegImm:$a)), (LoadInst addrRegImm:$a)>; + +class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> : + MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>; + // Small immediates def : MipsPat<(i32 immSExt16:$in), (ADDiu ZERO, imm:$in)>; @@ -1220,6 +1290,24 @@ defm : SetgeImmPats<CPURegs, SLTi, SLTiu>; // bswap pattern def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>; +// mflo/hi patterns. +def : MipsPat<(i32 (ExtractLOHI ACRegs:$ac, imm:$lohi_idx)), + (EXTRACT_SUBREG ACRegs:$ac, imm:$lohi_idx)>; + +// Load halfword/word patterns. +let AddedComplexity = 40 in { + let Predicates = [NotN64, HasStdEnc] in { + def : LoadRegImmPat<LBu, i32, zextloadi8>; + def : LoadRegImmPat<LH, i32, sextloadi16>; + def : LoadRegImmPat<LW, i32, load>; + } + let Predicates = [IsN64, HasStdEnc] in { + def : LoadRegImmPat<LBu_P8, i32, zextloadi8>; + def : LoadRegImmPat<LH_P8, i32, sextloadi16>; + def : LoadRegImmPat<LW_P8, i32, load>; + } +} + //===----------------------------------------------------------------------===// // Floating Point Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 13b2a6ac17..5ed5124139 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -58,7 +58,8 @@ public: int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; /// Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; @@ -67,6 +68,9 @@ public: unsigned getEHExceptionRegister() const; unsigned getEHHandlerRegister() const; + /// \brief Return GPR register class. + virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0; + private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index f93dd86c17..64458bcef7 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -18,6 +18,10 @@ def sub_lo : SubRegIndex; def sub_hi : SubRegIndex; } +class Unallocatable { + bit isAllocatable = 0; +} + // We have banks of 32 registers each. class MipsReg<bits<16> Enc, string n> : Register<n> { let HWEncoding = Enc; @@ -54,6 +58,13 @@ class AFPR64<bits<16> Enc, string n, list<Register> subregs> let SubRegIndices = [sub_32]; } +// Accumulator Registers +class ACC<bits<16> Enc, string n, list<Register> subregs> + : MipsRegWithSubRegs<Enc, n, subregs> { + let SubRegIndices = [sub_lo, sub_hi]; + let CoveredBySubRegs = 1; +} + // Mips Hardware Registers class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>; @@ -219,7 +230,13 @@ let Namespace = "Mips" in { // Hi/Lo registers def HI : Register<"hi">, DwarfRegNum<[64]>; + def HI1 : Register<"hi1">, DwarfRegNum<[176]>; + def HI2 : Register<"hi2">, DwarfRegNum<[178]>; + def HI3 : Register<"hi3">, DwarfRegNum<[180]>; def LO : Register<"lo">, DwarfRegNum<[65]>; + def LO1 : Register<"lo1">, DwarfRegNum<[177]>; + def LO2 : Register<"lo2">, DwarfRegNum<[179]>; + def LO3 : Register<"lo3">, DwarfRegNum<[181]>; let SubRegIndices = [sub_32] in { def HI64 : RegisterWithSubRegs<"hi", [HI]>; @@ -240,11 +257,12 @@ let Namespace = "Mips" in { def HWR29_64 : MipsReg<29, "29">; // Accum registers - let SubRegIndices = [sub_lo, sub_hi] in - def AC0 : MipsRegWithSubRegs<0, "ac0", [LO, HI]>; - def AC1 : MipsReg<1, "ac1">; - def AC2 : MipsReg<2, "ac2">; - def AC3 : MipsReg<3, "ac3">; + def AC0 : ACC<0, "ac0", [LO, HI]>; + def AC1 : ACC<1, "ac1", [LO1, HI1]>; + def AC2 : ACC<2, "ac2", [LO2, HI2]>; + def AC3 : ACC<3, "ac3", [LO3, HI3]>; + + def AC0_64 : ACC<0, "ac0", [LO64, HI64]>; def DSPCtrl : Register<"dspctrl">; } @@ -291,9 +309,9 @@ def CPU16Regs : RegisterClass<"Mips", [i32], 32, (add // Callee save S0, S1)>; -def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>; +def CPURAReg : RegisterClass<"Mips", [i32], 32, (add RA)>, Unallocatable; -def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>; +def CPUSPReg : RegisterClass<"Mips", [i32], 32, (add SP)>, Unallocatable; // 64bit fp: // * FGR64 - 32 64-bit registers @@ -319,18 +337,28 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; // Condition Register for floating point operations -def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>; +def CCR : RegisterClass<"Mips", [i32], 32, (add FCR31,FCC0)>, Unallocatable; // Hi/Lo Registers -def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>; -def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>; +def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>, Unallocatable; +def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)>, Unallocatable; // Hardware registers -def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>; -def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>; +def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>, Unallocatable; +def HWRegs64 : RegisterClass<"Mips", [i64], 64, (add HWR29_64)>, Unallocatable; // Accumulator Registers -def ACRegs : RegisterClass<"Mips", [i64], 64, (sequence "AC%u", 0, 3)>; +def ACRegs : RegisterClass<"Mips", [untyped], 64, (add AC0)> { + let Size = 64; +} + +def ACRegs128 : RegisterClass<"Mips", [untyped], 128, (add AC0_64)> { + let Size = 128; +} + +def ACRegsDSP : RegisterClass<"Mips", [untyped], 64, (sequence "AC%u", 0, 3)> { + let Size = 64; +} def CPURegsAsmOperand : AsmOperandClass { let Name = "CPURegsAsm"; diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 0dd671376f..68ec921888 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -29,6 +29,155 @@ using namespace llvm; +namespace { +typedef MachineBasicBlock::iterator Iter; + +/// Helper class to expand accumulator pseudos. +class ExpandACCPseudo { +public: + ExpandACCPseudo(MachineFunction &MF); + bool expand(); + +private: + bool expandInstr(MachineBasicBlock &MBB, Iter I); + void expandLoad(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + void expandStore(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + void expandCopy(MachineBasicBlock &MBB, Iter I, unsigned RegSize); + + MachineFunction &MF; + const MipsSEInstrInfo &TII; + const MipsRegisterInfo &RegInfo; + MachineRegisterInfo &MRI; +}; +} + +ExpandACCPseudo::ExpandACCPseudo(MachineFunction &MF_) + : MF(MF_), + TII(*static_cast<const MipsSEInstrInfo*>(MF.getTarget().getInstrInfo())), + RegInfo(TII.getRegisterInfo()), MRI(MF.getRegInfo()) {} + +bool ExpandACCPseudo::expand() { + bool Expanded = false; + + for (MachineFunction::iterator BB = MF.begin(), BBEnd = MF.end(); + BB != BBEnd; ++BB) + for (Iter I = BB->begin(), End = BB->end(); I != End;) + Expanded |= expandInstr(*BB, I++); + + return Expanded; +} + +bool ExpandACCPseudo::expandInstr(MachineBasicBlock &MBB, Iter I) { + switch(I->getOpcode()) { + case Mips::LOAD_AC64: + case Mips::LOAD_AC64_P8: + case Mips::LOAD_AC_DSP: + case Mips::LOAD_AC_DSP_P8: + expandLoad(MBB, I, 4); + break; + case Mips::LOAD_AC128: + case Mips::LOAD_AC128_P8: + expandLoad(MBB, I, 8); + break; + case Mips::STORE_AC64: + case Mips::STORE_AC64_P8: + case Mips::STORE_AC_DSP: + case Mips::STORE_AC_DSP_P8: + expandStore(MBB, I, 4); + break; + case Mips::STORE_AC128: + case Mips::STORE_AC128_P8: + expandStore(MBB, I, 8); + break; + case Mips::COPY_AC64: + case Mips::COPY_AC_DSP: + expandCopy(MBB, I, 4); + break; + case Mips::COPY_AC128: + expandCopy(MBB, I, 8); + break; + default: + return false; + } + + MBB.erase(I); + return true; +} + +void ExpandACCPseudo::expandLoad(MachineBasicBlock &MBB, Iter I, + unsigned RegSize) { + // load $vr0, FI + // copy lo, $vr0 + // load $vr1, FI + 4 + // copy hi, $vr1 + + assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); + + const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); + unsigned VR0 = MRI.createVirtualRegister(RC); + unsigned VR1 = MRI.createVirtualRegister(RC); + unsigned Dst = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + unsigned Lo = RegInfo.getSubReg(Dst, Mips::sub_lo); + unsigned Hi = RegInfo.getSubReg(Dst, Mips::sub_hi); + DebugLoc DL = I->getDebugLoc(); + const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + + TII.loadRegFromStack(MBB, I, VR0, FI, RC, &RegInfo, 0); + BuildMI(MBB, I, DL, Desc, Lo).addReg(VR0, RegState::Kill); + TII.loadRegFromStack(MBB, I, VR1, FI, RC, &RegInfo, RegSize); + BuildMI(MBB, I, DL, Desc, Hi).addReg(VR1, RegState::Kill); +} + +void ExpandACCPseudo::expandStore(MachineBasicBlock &MBB, Iter I, + unsigned RegSize) { + // copy $vr0, lo + // store $vr0, FI + // copy $vr1, hi + // store $vr1, FI + 4 + + assert(I->getOperand(0).isReg() && I->getOperand(1).isFI()); + + const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); + unsigned VR0 = MRI.createVirtualRegister(RC); + unsigned VR1 = MRI.createVirtualRegister(RC); + unsigned Src = I->getOperand(0).getReg(), FI = I->getOperand(1).getIndex(); + unsigned SrcKill = getKillRegState(I->getOperand(0).isKill()); + unsigned Lo = RegInfo.getSubReg(Src, Mips::sub_lo); + unsigned Hi = RegInfo.getSubReg(Src, Mips::sub_hi); + DebugLoc DL = I->getDebugLoc(); + + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(Lo, SrcKill); + TII.storeRegToStack(MBB, I, VR0, true, FI, RC, &RegInfo, 0); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(Hi, SrcKill); + TII.storeRegToStack(MBB, I, VR1, true, FI, RC, &RegInfo, RegSize); +} + +void ExpandACCPseudo::expandCopy(MachineBasicBlock &MBB, Iter I, + unsigned RegSize) { + // copy $vr0, src_lo + // copy dst_lo, $vr0 + // copy $vr1, src_hi + // copy dst_hi, $vr1 + + const TargetRegisterClass *RC = RegInfo.intRegClass(RegSize); + unsigned VR0 = MRI.createVirtualRegister(RC); + unsigned VR1 = MRI.createVirtualRegister(RC); + unsigned Dst = I->getOperand(0).getReg(), Src = I->getOperand(1).getReg(); + unsigned SrcKill = getKillRegState(I->getOperand(1).isKill()); + unsigned DstLo = RegInfo.getSubReg(Dst, Mips::sub_lo); + unsigned DstHi = RegInfo.getSubReg(Dst, Mips::sub_hi); + unsigned SrcLo = RegInfo.getSubReg(Src, Mips::sub_lo); + unsigned SrcHi = RegInfo.getSubReg(Src, Mips::sub_hi); + DebugLoc DL = I->getDebugLoc(); + + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR0).addReg(SrcLo, SrcKill); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstLo) + .addReg(VR0, RegState::Kill); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), VR1).addReg(SrcHi, SrcKill); + BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), DstHi) + .addReg(VR1, RegState::Kill); +} + unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const { static const unsigned EhDataReg[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 @@ -246,7 +395,10 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { // Reserve call frame if the size of the maximum call frame fits into 16-bit // immediate field and there are no variable sized objects on the stack. - return isInt<16>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects(); + // Make sure the second register scavenger spill slot can be accessed with one + // instruction. + return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) && + !MFI->hasVarSizedObjects(); } // Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions @@ -284,6 +436,18 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (MipsFI->callsEhReturn()) MipsFI->createEhDataRegsFI(); + // Expand pseudo instructions which load, store or copy accumulators. + // Add an emergency spill slot if a pseudo was expanded. + if (ExpandACCPseudo(MF).expand()) { + // The spill slot should be half the size of the accumulator. If target is + // mips64, it should be 64-bit, otherwise it should be 32-bt. + const TargetRegisterClass *RC = STI.hasMips64() ? + &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; + int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(), + RC->getAlignment(), false); + RS->addScavengingFrameIndex(FI); + } + // Set scavenging frame index if necessary. uint64_t MaxSPOffset = MF.getInfo<MipsFunctionInfo>()->getIncomingArgSize() + estimateStackSize(MF); @@ -295,7 +459,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass; int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(), RC->getAlignment(), false); - RS->setScavengingFrameIndex(FI); + RS->addScavengingFrameIndex(FI); } const MipsFrameLowering * diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 7becd25455..193a66cc65 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -21,7 +21,7 @@ namespace llvm { class MipsSEFrameLowering : public MipsFrameLowering { public: explicit MipsSEFrameLowering(const MipsSubtarget &STI) - : MipsFrameLowering(STI) {} + : MipsFrameLowering(STI, STI.hasMips64() ? 16 : 8) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp new file mode 100644 index 0000000000..d6d220750c --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -0,0 +1,473 @@ +//===-- MipsSEISelDAGToDAG.cpp - A Dag to Dag Inst Selector for MipsSE ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-isel" +#include "MipsSEISelDAGToDAG.h" +#include "Mips.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MipsAnalyzeImmediate.h" +#include "MipsMachineFunction.h" +#include "MipsRegisterInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + + +bool MipsSEDAGToDAGISel::replaceUsesWithZeroReg(MachineRegisterInfo *MRI, + const MachineInstr& MI) { + unsigned DstReg = 0, ZeroReg = 0; + + // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0". + if ((MI.getOpcode() == Mips::ADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO; + } else if ((MI.getOpcode() == Mips::DADDiu) && + (MI.getOperand(1).getReg() == Mips::ZERO_64) && + (MI.getOperand(2).getImm() == 0)) { + DstReg = MI.getOperand(0).getReg(); + ZeroReg = Mips::ZERO_64; + } + + if (!DstReg) + return false; + + // Replace uses with ZeroReg. + for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg), + E = MRI->use_end(); U != E;) { + MachineOperand &MO = U.getOperand(); + unsigned OpNo = U.getOperandNo(); + MachineInstr *MI = MO.getParent(); + ++U; + + // Do not replace if it is a phi's operand or is tied to def operand. + if (MI->isPHI() || MI->isRegTiedToDefOperand(OpNo) || MI->isPseudo()) + continue; + + MO.setReg(ZeroReg); + } + + return true; +} + +void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + + if (!MipsFI->globalBaseRegSet()) + return; + + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator I = MBB.begin(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); + unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); + const TargetRegisterClass *RC; + + if (Subtarget.isABI_N64()) + RC = (const TargetRegisterClass*)&Mips::CPU64RegsRegClass; + else + RC = (const TargetRegisterClass*)&Mips::CPURegsRegClass; + + V0 = RegInfo.createVirtualRegister(RC); + V1 = RegInfo.createVirtualRegister(RC); + + if (Subtarget.isABI_N64()) { + MF.getRegInfo().addLiveIn(Mips::T9_64); + MBB.addLiveIn(Mips::T9_64); + + // lui $v0, %hi(%neg(%gp_rel(fname))) + // daddu $v1, $v0, $t9 + // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0) + .addReg(Mips::T9_64); + BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + if (MF.getTarget().getRelocationModel() == Reloc::Static) { + // Set global register to __gnu_local_gp. + // + // lui $v0, %hi(__gnu_local_gp) + // addiu $globalbasereg, $v0, %lo(__gnu_local_gp) + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0) + .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO); + return; + } + + MF.getRegInfo().addLiveIn(Mips::T9); + MBB.addLiveIn(Mips::T9); + + if (Subtarget.isABI_N32()) { + // lui $v0, %hi(%neg(%gp_rel(fname))) + // addu $v1, $v0, $t9 + // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9); + BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + return; + } + + assert(Subtarget.isABI_O32()); + + // For O32 ABI, the following instruction sequence is emitted to initialize + // the global base register: + // + // 0. lui $2, %hi(_gp_disp) + // 1. addiu $2, $2, %lo(_gp_disp) + // 2. addu $globalbasereg, $2, $t9 + // + // We emit only the last instruction here. + // + // GNU linker requires that the first two instructions appear at the beginning + // of a function and no instructions be inserted before or between them. + // The two instructions are emitted during lowering to MC layer in order to + // avoid any reordering. + // + // Register $2 (Mips::V0) is added to the list of live-in registers to ensure + // the value instruction 1 (addiu) defines is valid when instruction 2 (addu) + // reads it. + MF.getRegInfo().addLiveIn(Mips::V0); + MBB.addLiveIn(Mips::V0); + BuildMI(MBB, I, DL, TII.get(Mips::ADDu), GlobalBaseReg) + .addReg(Mips::V0).addReg(Mips::T9); +} + +void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) { + initGlobalBaseReg(MF); + + MachineRegisterInfo *MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE; + ++MFI) + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) + replaceUsesWithZeroReg(MRI, *I); +} + +/// Select multiply instructions. +std::pair<SDNode*, SDNode*> +MipsSEDAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, DebugLoc DL, EVT Ty, + bool HasLo, bool HasHi) { + SDNode *Lo = 0, *Hi = 0; + SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), + N->getOperand(1)); + SDValue InFlag = SDValue(Mul, 0); + + if (HasLo) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64); + Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag); + InFlag = SDValue(Lo, 1); + } + if (HasHi) { + unsigned Opcode = (Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64); + Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag); + } + return std::make_pair(Lo, Hi); +} + +SDNode *MipsSEDAGToDAGISel::selectAddESubE(unsigned MOp, SDValue InFlag, + SDValue CmpLHS, DebugLoc DL, + SDNode *Node) const { + unsigned Opc = InFlag.getOpcode(); (void)Opc; + + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); + + SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) }; + SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + + SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, DL, VT, Ops, 2); + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, DL, VT, + SDValue(Carry, 0), RHS); + return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, LHS, + SDValue(AddCarry, 0)); +} + +/// ComplexPattern used on MipsInstrInfo +/// Used on Mips Load/Store instructions +bool MipsSEDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + EVT ValTy = Addr.getValueType(); + + // if Address is FI, get the TargetFrameIndex. + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + Offset = CurDAG->getTargetConstant(0, ValTy); + return true; + } + + // on PIC code Load GA + if (Addr.getOpcode() == MipsISD::Wrapper) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + if (TM.getRelocationModel() != Reloc::PIC_) { + if ((Addr.getOpcode() == ISD::TargetExternalSymbol || + Addr.getOpcode() == ISD::TargetGlobalAddress)) + return false; + } + + // Addresses of the form FI+const or FI|const + if (CurDAG->isBaseWithConstantOffset(Addr)) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); + if (isInt<16>(CN->getSExtValue())) { + + // If the first operand is a FI, get the TargetFI Node + if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode> + (Addr.getOperand(0))) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy); + else + Base = Addr.getOperand(0); + + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), ValTy); + return true; + } + } + + // Operand is a result from an ADD. + if (Addr.getOpcode() == ISD::ADD) { + // When loading from constant pools, load the lower address part in + // the instruction itself. Example, instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(1).getOpcode() == MipsISD::Lo || + Addr.getOperand(1).getOpcode() == MipsISD::GPRel) { + SDValue Opnd0 = Addr.getOperand(1).getOperand(0); + if (isa<ConstantPoolSDNode>(Opnd0) || isa<GlobalAddressSDNode>(Opnd0) || + isa<JumpTableSDNode>(Opnd0)) { + Base = Addr.getOperand(0); + Offset = Opnd0; + return true; + } + } + } + + return false; +} + +bool MipsSEDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + Base = Addr; + Offset = CurDAG->getTargetConstant(0, Addr.getValueType()); + return true; +} + +bool MipsSEDAGToDAGISel::selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const { + return selectAddrRegImm(Addr, Base, Offset) || + selectAddrDefault(Addr, Base, Offset); +} + +std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { + unsigned Opcode = Node->getOpcode(); + DebugLoc DL = Node->getDebugLoc(); + + /// + // Instruction Selection not handled by the auto-generated + // tablegen selection should be handled here. + /// + EVT NodeTy = Node->getValueType(0); + SDNode *Result; + unsigned MultOpc; + + switch(Opcode) { + default: break; + + case ISD::SUBE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::SUBu, InFlag, InFlag.getOperand(0), DL, Node); + return std::make_pair(true, Result); + } + + case ISD::ADDE: { + SDValue InFlag = Node->getOperand(2); + Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); + return std::make_pair(true, Result); + } + + /// Mul with two results + case ISD::SMUL_LOHI: + case ISD::UMUL_LOHI: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT); + + std::pair<SDNode*, SDNode*> LoHi = selectMULT(Node, MultOpc, DL, NodeTy, + true, true); + + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0)); + + if (!SDValue(Node, 1).use_empty()) + ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); + + return std::make_pair(true, (SDNode*)NULL); + } + + /// Special Muls + case ISD::MUL: { + // Mips32 has a 32-bit three operand mul instruction. + if (Subtarget.hasMips32() && NodeTy == MVT::i32) + break; + MultOpc = NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT; + Result = selectMULT(Node, MultOpc, DL, NodeTy, true, false).first; + return std::make_pair(true, Result); + } + case ISD::MULHS: + case ISD::MULHU: { + if (NodeTy == MVT::i32) + MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); + else + MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT); + + Result = selectMULT(Node, MultOpc, DL, NodeTy, false, true).second; + return std::make_pair(true, Result); + } + + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + if (Subtarget.hasMips64()) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO_64, MVT::i64); + Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); + } else { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, + Mips::ZERO, MVT::i32); + Result = CurDAG->getMachineNode(Mips::BuildPairF64, DL, MVT::f64, Zero, + Zero); + } + + return std::make_pair(true, Result); + } + break; + } + + case ISD::Constant: { + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node); + unsigned Size = CN->getValueSizeInBits(0); + + if (Size == 32) + break; + + MipsAnalyzeImmediate AnalyzeImm; + int64_t Imm = CN->getSExtValue(); + + const MipsAnalyzeImmediate::InstSeq &Seq = + AnalyzeImm.Analyze(Imm, Size, false); + + MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin(); + DebugLoc DL = CN->getDebugLoc(); + SDNode *RegOpnd; + SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + + // The first instruction can be a LUi which is different from other + // instructions (ADDiu, ORI and SLL) in that it does not have a register + // operand. + if (Inst->Opc == Mips::LUi64) + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd); + else + RegOpnd = + CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + CurDAG->getRegister(Mips::ZERO_64, MVT::i64), + ImmOpnd); + + // The remaining instructions in the sequence are handled here. + for (++Inst; Inst != Seq.end(); ++Inst) { + ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd), + MVT::i64); + RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, + SDValue(RegOpnd, 0), ImmOpnd); + } + + return std::make_pair(true, RegOpnd); + } + + case MipsISD::ThreadPointer: { + EVT PtrVT = TLI.getPointerTy(); + unsigned RdhwrOpc, SrcReg, DestReg; + + if (PtrVT == MVT::i32) { + RdhwrOpc = Mips::RDHWR; + SrcReg = Mips::HWR29; + DestReg = Mips::V1; + } else { + RdhwrOpc = Mips::RDHWR64; + SrcReg = Mips::HWR29_64; + DestReg = Mips::V1_64; + } + + SDNode *Rdhwr = + CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(), + Node->getValueType(0), + CurDAG->getRegister(SrcReg, PtrVT)); + SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg, + SDValue(Rdhwr, 0)); + SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT); + ReplaceUses(SDValue(Node, 0), ResNode); + return std::make_pair(true, ResNode.getNode()); + } + + case MipsISD::InsertLOHI: { + unsigned RCID = Subtarget.hasDSP() ? Mips::ACRegsDSPRegClassID : + Mips::ACRegsRegClassID; + SDValue RegClass = CurDAG->getTargetConstant(RCID, MVT::i32); + SDValue LoIdx = CurDAG->getTargetConstant(Mips::sub_lo, MVT::i32); + SDValue HiIdx = CurDAG->getTargetConstant(Mips::sub_hi, MVT::i32); + const SDValue Ops[] = { RegClass, Node->getOperand(0), LoIdx, + Node->getOperand(1), HiIdx }; + SDNode *Res = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, + MVT::Untyped, Ops, 5); + return std::make_pair(true, Res); + } + } + + return std::make_pair(false, (SDNode*)NULL); +} + +FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) { + return new MipsSEDAGToDAGISel(TM); +} diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h new file mode 100644 index 0000000000..6137ab040b --- /dev/null +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -0,0 +1,57 @@ +//===-- MipsSEISelDAGToDAG.h - A Dag to Dag Inst Selector for MipsSE -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsDAGToDAGISel specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSEISELDAGTODAG_H +#define MIPSSEISELDAGTODAG_H + +#include "MipsISelDAGToDAG.h" + +namespace llvm { + +class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { + +public: + explicit MipsSEDAGToDAGISel(MipsTargetMachine &TM) : MipsDAGToDAGISel(TM) {} + +private: + bool replaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&); + + std::pair<SDNode*, SDNode*> selectMULT(SDNode *N, unsigned Opc, DebugLoc dl, + EVT Ty, bool HasLo, bool HasHi); + + SDNode *selectAddESubE(unsigned MOp, SDValue InFlag, SDValue CmpLHS, + DebugLoc DL, SDNode *Node) const; + + virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const; + + virtual std::pair<bool, SDNode*> selectNode(SDNode *Node); + + virtual void processFunctionAfterISel(MachineFunction &MF); + + // Insert instructions to initialize the global base register in the + // first MBB of the function. + void initGlobalBaseReg(MachineFunction &MF); +}; + +FunctionPass *createMipsSEISelDag(MipsTargetMachine &TM); + +} + +#endif diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp new file mode 100644 index 0000000000..4f219218d3 --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -0,0 +1,442 @@ +//===-- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// +#include "MipsSEISelLowering.h" +#include "MipsRegisterInfo.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +static cl::opt<bool> +EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, + cl::desc("MIPS: Enable tail calls."), cl::init(false)); + +MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) + : MipsTargetLowering(TM) { + // Set up the register classes + addRegisterClass(MVT::i32, &Mips::CPURegsRegClass); + + if (HasMips64) + addRegisterClass(MVT::i64, &Mips::CPU64RegsRegClass); + + if (Subtarget->hasDSP()) { + MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; + + for (unsigned i = 0; i < array_lengthof(VecTys); ++i) { + addRegisterClass(VecTys[i], &Mips::DSPRegsRegClass); + + // Expand all builtin opcodes. + for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) + setOperationAction(Opc, VecTys[i], Expand); + + setOperationAction(ISD::LOAD, VecTys[i], Legal); + setOperationAction(ISD::STORE, VecTys[i], Legal); + setOperationAction(ISD::BITCAST, VecTys[i], Legal); + } + } + + if (!TM.Options.UseSoftFloat) { + addRegisterClass(MVT::f32, &Mips::FGR32RegClass); + + // When dealing with single precision only, use libcalls + if (!Subtarget->isSingleFloat()) { + if (HasMips64) + addRegisterClass(MVT::f64, &Mips::FGR64RegClass); + else + addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); + } + } + + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); + setOperationAction(ISD::MULHS, MVT::i32, Custom); + setOperationAction(ISD::MULHU, MVT::i32, Custom); + + if (HasMips64) + setOperationAction(ISD::MUL, MVT::i64, Custom); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::SDIVREM, MVT::i64, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + setOperationAction(ISD::LOAD, MVT::i32, Custom); + setOperationAction(ISD::STORE, MVT::i32, Custom); + + setTargetDAGCombine(ISD::ADDE); + setTargetDAGCombine(ISD::SUBE); + + computeRegisterProperties(); +} + +const MipsTargetLowering * +llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { + return new MipsSETargetLowering(TM); +} + + +bool +MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { + MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + + switch (SVT) { + case MVT::i64: + case MVT::i32: + if (Fast) + *Fast = true; + return true; + default: + return false; + } +} + +SDValue MipsSETargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch(Op.getOpcode()) { + case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); + case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); + case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); + case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); + case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); + case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); + case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, DAG); + } + + return MipsTargetLowering::LowerOperation(Op, DAG); +} + +// selectMADD - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc multLo, Lo0), (adde multHi, Hi0), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMADD(SDNode *ADDENode, SelectionDAG *CurDAG) { + // ADDENode's second operand must be a flag output of an ADDC node in order + // for the matching to be successful. + SDNode *ADDCNode = ADDENode->getOperand(2).getNode(); + + if (ADDCNode->getOpcode() != ISD::ADDC) + return false; + + SDValue MultHi = ADDENode->getOperand(0); + SDValue MultLo = ADDCNode->getOperand(0); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MADD only if ADDENode and ADDCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than ADDENode or ADDCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MADD instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + DebugLoc DL = ADDENode->getDebugLoc(); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, + ADDCNode->getOperand(1), + ADDENode->getOperand(1)); + + // create MipsMAdd(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; + + SDValue MAdd = CurDAG->getNode(MultOpc, DL, MVT::Untyped, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of adde and addc here + if (!SDValue(ADDCNode, 0).use_empty()) { + SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32); + SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd, + LoIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), LoOut); + } + if (!SDValue(ADDENode, 0).use_empty()) { + SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32); + SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MAdd, + HiIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), HiOut); + } + + return true; +} + +// selectMSUB - +// Transforms a subgraph in CurDAG if the following pattern is found: +// (addc Lo0, multLo), (sube Hi0, multHi), +// where, +// multHi/Lo: product of multiplication +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. +static bool selectMSUB(SDNode *SUBENode, SelectionDAG *CurDAG) { + // SUBENode's second operand must be a flag output of an SUBC node in order + // for the matching to be successful. + SDNode *SUBCNode = SUBENode->getOperand(2).getNode(); + + if (SUBCNode->getOpcode() != ISD::SUBC) + return false; + + SDValue MultHi = SUBENode->getOperand(1); + SDValue MultLo = SUBCNode->getOperand(1); + SDNode *MultNode = MultHi.getNode(); + unsigned MultOpc = MultHi.getOpcode(); + + // MultHi and MultLo must be generated by the same node, + if (MultLo.getNode() != MultNode) + return false; + + // and it must be a multiplication. + if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) + return false; + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. + if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) + return false; + + // Transform this to a MSUB only if SUBENode and SUBCNode are the only users + // of the values of MultNode, in which case MultNode will be removed in later + // phases. + // If there exist users other than SUBENode or SUBCNode, this function returns + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MSUB instructions being + // produced. + if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) + return false; + + DebugLoc DL = SUBENode->getDebugLoc(); + + // Initialize accumulator. + SDValue ACCIn = CurDAG->getNode(MipsISD::InsertLOHI, DL, MVT::Untyped, + SUBCNode->getOperand(0), + SUBENode->getOperand(0)); + + // create MipsSub(u) node + MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub; + + SDValue MSub = CurDAG->getNode(MultOpc, DL, MVT::Glue, + MultNode->getOperand(0),// Factor 0 + MultNode->getOperand(1),// Factor 1 + ACCIn); + + // replace uses of sube and subc here + if (!SDValue(SUBCNode, 0).use_empty()) { + SDValue LoIdx = CurDAG->getConstant(Mips::sub_lo, MVT::i32); + SDValue LoOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, + LoIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), LoOut); + } + if (!SDValue(SUBENode, 0).use_empty()) { + SDValue HiIdx = CurDAG->getConstant(Mips::sub_hi, MVT::i32); + SDValue HiOut = CurDAG->getNode(MipsISD::ExtractLOHI, DL, MVT::i32, MSub, + HiIdx); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), HiOut); + } + + return true; +} + +static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + selectMADD(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + +static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + if (DCI.isBeforeLegalize()) + return SDValue(); + + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + selectMSUB(N, &DAG)) + return SDValue(N, 0); + + return SDValue(); +} + +SDValue +MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + switch (N->getOpcode()) { + case ISD::ADDE: + return performADDECombine(N, DAG, DCI, Subtarget); + case ISD::SUBE: + return performSUBECombine(N, DAG, DCI, Subtarget); + default: + return MipsTargetLowering::PerformDAGCombine(N, DCI); + } +} + +MachineBasicBlock * +MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { + switch (MI->getOpcode()) { + default: + return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); + case Mips::BPOSGE32_PSEUDO: + return emitBPOSGE32(MI, BB); + } +} + +bool MipsSETargetLowering:: +isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const { + if (!EnableMipsTailCalls) + return false; + + // Return false if either the callee or caller has a byval argument. + if (MipsCCInfo.hasByValArg() || FI.hasByvalArg()) + return false; + + // Return true if the callee's argument area is no larger than the + // caller's. + return NextStackOffset <= FI.getIncomingArgSize(); +} + +void MipsSETargetLowering:: +getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { + // T9 should contain the address of the callee function if + // -reloction-model=pic or it is an indirect call. + if (IsPICCall || !GlobalOrExternal) { + unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; + RegsToPass.push_front(std::make_pair(T9Reg, Callee)); + } else + Ops.push_back(Callee); + + MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, + InternalLinkage, CLI, Callee, Chain); +} + +SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, + bool HasLo, bool HasHi, + SelectionDAG &DAG) const { + EVT Ty = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, + Op.getOperand(0), Op.getOperand(1)); + SDValue Lo, Hi; + + if (HasLo) + Lo = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, + DAG.getConstant(Mips::sub_lo, MVT::i32)); + if (HasHi) + Hi = DAG.getNode(MipsISD::ExtractLOHI, DL, Ty, Mult, + DAG.getConstant(Mips::sub_hi, MVT::i32)); + + if (!HasLo || !HasHi) + return HasLo ? Lo : Hi; + + SDValue Vals[] = { Lo, Hi }; + return DAG.getMergeValues(Vals, 2, DL); +} + +MachineBasicBlock * MipsSETargetLowering:: +emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ + // $bb: + // bposge32_pseudo $vr0 + // => + // $bb: + // bposge32 $tbb + // $fbb: + // li $vr2, 0 + // b $sink + // $tbb: + // li $vr1, 1 + // $sink: + // $vr0 = phi($vr2, $fbb, $vr1, $tbb) + + MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetRegisterClass *RC = &Mips::CPURegsRegClass; + DebugLoc DL = MI->getDebugLoc(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = llvm::next(MachineFunction::iterator(BB)); + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, FBB); + F->insert(It, TBB); + F->insert(It, Sink); + + // Transfer the remainder of BB and its successor edges to Sink. + Sink->splice(Sink->begin(), BB, llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + Sink->transferSuccessorsAndUpdatePHIs(BB); + + // Add successors. + BB->addSuccessor(FBB); + BB->addSuccessor(TBB); + FBB->addSuccessor(Sink); + TBB->addSuccessor(Sink); + + // Insert the real bposge32 instruction to $BB. + BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); + + // Fill $FBB. + unsigned VR2 = RegInfo.createVirtualRegister(RC); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) + .addReg(Mips::ZERO).addImm(0); + BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); + + // Fill $TBB. + unsigned VR1 = RegInfo.createVirtualRegister(RC); + BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) + .addReg(Mips::ZERO).addImm(1); + + // Insert phi function to $Sink. + BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), + MI->getOperand(0).getReg()) + .addReg(VR2).addMBB(FBB).addReg(VR1).addMBB(TBB); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return Sink; +} diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h new file mode 100644 index 0000000000..186f6a343d --- /dev/null +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -0,0 +1,62 @@ +//===-- MipsSEISelLowering.h - MipsSE DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Subclass of MipsTargetLowering specialized for mips32/64. +// +//===----------------------------------------------------------------------===// + +#ifndef MipsSEISELLOWERING_H +#define MipsSEISELLOWERING_H + +#include "MipsISelLowering.h" +#include "MipsRegisterInfo.h" + +namespace llvm { + class MipsSETargetLowering : public MipsTargetLowering { + public: + explicit MipsSETargetLowering(MipsTargetMachine &TM); + + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const; + + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + + virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { + if (VT == MVT::Untyped) + return Subtarget->hasDSP() ? &Mips::ACRegsDSPRegClass : + &Mips::ACRegsRegClass; + + return TargetLowering::getRepRegClassFor(VT); + } + + private: + virtual bool + isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const; + + virtual void + getOpndList(SmallVectorImpl<SDValue> &Ops, + std::deque< std::pair<unsigned, SDValue> > &RegsToPass, + bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, + CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + + SDValue lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, + SelectionDAG &DAG) const; + + MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, + MachineBasicBlock *BB) const; + }; +} + +#endif // MipsSEISELLOWERING_H diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index a9809ef712..9d08172081 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -136,6 +136,12 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (Mips::FGR64RegClass.contains(DestReg)) Opc = Mips::DMTC1; } + else if (Mips::ACRegsRegClass.contains(DestReg, SrcReg)) + Opc = Mips::COPY_AC64; + else if (Mips::ACRegsDSPRegClass.contains(DestReg, SrcReg)) + Opc = Mips::COPY_AC_DSP; + else if (Mips::ACRegs128RegClass.contains(DestReg, SrcReg)) + Opc = Mips::COPY_AC128; assert(Opc && "Cannot copy registers"); @@ -152,10 +158,10 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } void MipsSEInstrInfo:: -storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { +storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool isKill, int FI, + const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, + int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore); @@ -166,6 +172,12 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::SW_P8 : Mips::SW; else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::SD_P8 : Mips::SD; + else if (Mips::ACRegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_AC64_P8 : Mips::STORE_AC64; + else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_AC_DSP_P8 : Mips::STORE_AC_DSP; + else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::STORE_AC128_P8 : Mips::STORE_AC128; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::SWC1_P8 : Mips::SWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) @@ -175,15 +187,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert(Opc && "Register class not handled!"); BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + .addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); } void MipsSEInstrInfo:: -loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const -{ +loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, int64_t Offset) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad); @@ -193,6 +203,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::LW_P8 : Mips::LW; else if (Mips::CPU64RegsRegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LD_P8 : Mips::LD; + else if (Mips::ACRegsRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_AC64_P8 : Mips::LOAD_AC64; + else if (Mips::ACRegsDSPRegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_AC_DSP_P8 : Mips::LOAD_AC_DSP; + else if (Mips::ACRegs128RegClass.hasSubClassEq(RC)) + Opc = IsN64 ? Mips::LOAD_AC128_P8 : Mips::LOAD_AC128; else if (Mips::FGR32RegClass.hasSubClassEq(RC)) Opc = IsN64 ? Mips::LWC1_P8 : Mips::LWC1; else if (Mips::AFGR64RegClass.hasSubClassEq(RC)) @@ -201,7 +217,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164; assert(Opc && "Register class not handled!"); - BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0) + BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset) .addMemOperand(MMO); } diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 3e22b33ed7..0bf7876f0f 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -49,17 +49,19 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + virtual void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; + + virtual void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const; virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index a39b393e4e..96967380b2 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -54,6 +54,15 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const { return true; } +const TargetRegisterClass * +MipsSERegisterInfo::intRegClass(unsigned Size) const { + if (Size == 4) + return &Mips::CPURegsRegClass; + + assert(Size == 8); + return &Mips::CPU64RegsRegClass; +} + void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index f6827e9663..2f7c37bb46 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -31,6 +31,8 @@ public: bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + virtual const TargetRegisterClass *intRegClass(unsigned Size) const; + private: virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, int FrameIndex, uint64_t StackSize, diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index fd930f0335..33363580ab 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -54,7 +54,7 @@ MipsTargetMachine(const Target &T, StringRef TT, "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32-S64")), InstrInfo(MipsInstrInfo::create(*this)), FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(*this), TSInfo(*this), JITInfo() { + TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), JITInfo() { } void MipsebTargetMachine::anchor() { } diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index c4928c21eb..7e5f192264 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -34,7 +34,7 @@ class MipsTargetMachine : public LLVMTargetMachine { const DataLayout DL; // Calculates type size & alignment OwningPtr<const MipsInstrInfo> InstrInfo; OwningPtr<const MipsFrameLowering> FrameLowering; - MipsTargetLowering TLInfo; + OwningPtr<const MipsTargetLowering> TLInfo; MipsSelectionDAGInfo TSInfo; MipsJITInfo JITInfo; @@ -63,7 +63,7 @@ public: } virtual const MipsTargetLowering *getTargetLowering() const { - return &TLInfo; + return TLInfo.get(); } virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 47baef6696..7da2fed4cd 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXUtilities.cpp + NVVMReflect.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index 454583850b..b3e8b5d262 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -52,25 +52,24 @@ enum PropertyAnnotation { }; const unsigned AnnotationNameLen = 8; // length of each annotation name -const char -PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { - "maxntidx", // PROPERTY_MAXNTID_X - "maxntidy", // PROPERTY_MAXNTID_Y - "maxntidz", // PROPERTY_MAXNTID_Z - "reqntidx", // PROPERTY_REQNTID_X - "reqntidy", // PROPERTY_REQNTID_Y - "reqntidz", // PROPERTY_REQNTID_Z - "minctasm", // PROPERTY_MINNCTAPERSM - "texture", // PROPERTY_ISTEXTURE - "surface", // PROPERTY_ISSURFACE - "sampler", // PROPERTY_ISSAMPLER - "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM - "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM - "kernel", // PROPERTY_ISKERNEL_FUNCTION - "align", // PROPERTY_ALIGN +const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { + "maxntidx", // PROPERTY_MAXNTID_X + "maxntidy", // PROPERTY_MAXNTID_Y + "maxntidz", // PROPERTY_MAXNTID_Z + "reqntidx", // PROPERTY_REQNTID_X + "reqntidy", // PROPERTY_REQNTID_Y + "reqntidz", // PROPERTY_REQNTID_Z + "minctasm", // PROPERTY_MINNCTAPERSM + "texture", // PROPERTY_ISTEXTURE + "surface", // PROPERTY_ISSURFACE + "sampler", // PROPERTY_ISSAMPLER + "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM + "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM + "kernel", // PROPERTY_ISKERNEL_FUNCTION + "align", // PROPERTY_ALIGN - // last property - "proplast", // PROPERTY_LAST + // last property + "proplast", // PROPERTY_LAST }; // name of named metadata used for global annotations @@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { // compiling those .cpp files, hence __attribute__((unused)). __attribute__((unused)) #endif -static const char* NamedMDForAnnotations = "nvvm.annotations"; + static const char *NamedMDForAnnotations = "nvvm.annotations"; } - #endif diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp index 619181994a..459cd96cb0 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp @@ -23,10 +23,9 @@ bool CompileForDebugging; // compile for debugging static cl::opt<bool, true> Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden, - cl::location(CompileForDebugging), - cl::init(false)); + cl::location(CompileForDebugging), cl::init(false)); -void NVPTXMCAsmInfo::anchor() { } +void NVPTXMCAsmInfo::anchor() {} NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Triple TheTriple(TT); @@ -55,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) { Data32bitsDirective = " .b32 "; Data64bitsDirective = " .b64 "; PrivateGlobalPrefix = ""; - ZeroDirective = " .b8"; + ZeroDirective = " .b8"; AsciiDirective = " .b8"; AscizDirective = " .b8"; diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 44aa01ca6e..ccd29705df 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -28,7 +28,6 @@ #define GET_REGINFO_MC_DESC #include "NVPTXGenRegisterInfo.inc" - using namespace llvm; static MCInstrInfo *createNVPTXMCInstrInfo() { @@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) { return X; } -static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { +static MCSubtargetInfo * +createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); InitNVPTXMCSubtargetInfo(X, TT, CPU, FS); return X; } -static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { +static MCCodeGenInfo *createNVPTXMCCodeGenInfo( + StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { MCCodeGenInfo *X = new MCCodeGenInfo(); X->InitMCCodeGenInfo(RM, CM, OL); return X; } - // Force static initialization. extern "C" void LLVMInitializeNVPTXTargetMC() { // Register the MC asm info. diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h index b5684883fc..d6c79b5110 100644 --- a/lib/Target/NVPTX/ManagedStringPool.h +++ b/lib/Target/NVPTX/ManagedStringPool.h @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #ifndef LLVM_SUPPORT_MANAGED_STRING_H #define LLVM_SUPPORT_MANAGED_STRING_H diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index b46ea881c4..6a53a443bf 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -41,18 +41,24 @@ enum CondCodes { inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) { switch (CC) { - case NVPTXCC::NE: return "ne"; - case NVPTXCC::EQ: return "eq"; - case NVPTXCC::LT: return "lt"; - case NVPTXCC::LE: return "le"; - case NVPTXCC::GT: return "gt"; - case NVPTXCC::GE: return "ge"; + case NVPTXCC::NE: + return "ne"; + case NVPTXCC::EQ: + return "eq"; + case NVPTXCC::LT: + return "lt"; + case NVPTXCC::LE: + return "le"; + case NVPTXCC::GT: + return "gt"; + case NVPTXCC::GE: + return "ge"; } llvm_unreachable("Unknown condition code"); } -FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, - llvm::CodeGenOpt::Level OptLevel); +FunctionPass * +createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); @@ -62,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *); extern Target TheNVPTXTarget32; extern Target TheNVPTXTarget64; -namespace NVPTX -{ +namespace NVPTX { enum DrvInterface { NVCL, CUDA, @@ -102,7 +107,7 @@ enum LoadStore { }; namespace PTXLdStInstCode { -enum AddressSpace{ +enum AddressSpace { GENERIC = 0, GLOBAL = 1, CONSTANT = 2, diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index 7aee3595c6..d78b4e81a3 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td" //===----------------------------------------------------------------------===// // SM Versions -def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10", - "Target SM 1.0">; -def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11", - "Target SM 1.1">; -def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12", - "Target SM 1.2">; -def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13", - "Target SM 1.3">; def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20", "Target SM 2.0">; def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21", @@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", class Proc<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; -def : Proc<"sm_10", [SM10]>; -def : Proc<"sm_11", [SM11]>; -def : Proc<"sm_12", [SM12]>; -def : Proc<"sm_13", [SM13]>; def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp index 60f52a46da..0f792ec682 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp @@ -19,9 +19,9 @@ namespace llvm { bool NVPTXAllocaHoisting::runOnFunction(Function &function) { - bool functionModified = false; - Function::iterator I = function.begin(); - TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); + bool functionModified = false; + Function::iterator I = function.begin(); + TerminatorInst *firstTerminatorInst = (I++)->getTerminator(); for (Function::iterator E = function.end(); I != E; ++I) { for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { @@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) { } char NVPTXAllocaHoisting::ID = 1; -RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting", - "Hoisting alloca instructions in non-entry " - "blocks to the entry block"); +RegisterPass<NVPTXAllocaHoisting> +X("alloca-hoisting", "Hoisting alloca instructions in non-entry " + "blocks to the entry block"); -FunctionPass *createAllocaHoisting() { - return new NVPTXAllocaHoisting(); -} +FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); } } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 0115e1f5d3..ce5d78afa3 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -47,7 +47,6 @@ #include <sstream> using namespace llvm; - #include "NVPTXGenAsmWriter.inc" bool RegAllocNilUsed = true; @@ -59,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers", cl::desc("NVPTX Specific: Emit Line numbers even without -G"), cl::init(true)); -namespace llvm { -bool InterleaveSrcInPtx = false; -} - -static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Emit source line in ptx file"), - cl::location(llvm::InterleaveSrcInPtx)); +namespace llvm { bool InterleaveSrcInPtx = false; } +static cl::opt<bool, true> +InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Emit source line in ptx file"), + cl::location(llvm::InterleaveSrcInPtx)); namespace { /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V /// depends. -void DiscoverDependentGlobals(Value *V, - DenseSet<GlobalVariable*> &Globals) { +void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) Globals.insert(GV); else { @@ -88,12 +83,12 @@ void DiscoverDependentGlobals(Value *V, /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable /// instances to be emitted, but only after any dependents have been added /// first. -void VisitGlobalVariableForEmission(GlobalVariable *GV, - SmallVectorImpl<GlobalVariable*> &Order, - DenseSet<GlobalVariable*> &Visited, - DenseSet<GlobalVariable*> &Visiting) { +void VisitGlobalVariableForEmission( + GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order, + DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) { // Have we already visited this one? - if (Visited.count(GV)) return; + if (Visited.count(GV)) + return; // Do we have a circular dependency? if (Visiting.count(GV)) @@ -103,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV, Visiting.insert(GV); // Make sure we visit all dependents first - DenseSet<GlobalVariable*> Others; + DenseSet<GlobalVariable *> Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) DiscoverDependentGlobals(GV->getOperand(i), Others); - - for (DenseSet<GlobalVariable*>::iterator I = Others.begin(), - E = Others.end(); I != E; ++I) + + for (DenseSet<GlobalVariable *>::iterator I = Others.begin(), + E = Others.end(); + I != E; ++I) VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); // Now we can visit ourself @@ -142,25 +138,23 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { if (CE == 0) llvm_unreachable("Unknown constant value to lower!"); - switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a // last resort before giving up. - if (Constant *C = - ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) + if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout())) if (C != CE) return LowerConstant(C, AP); // Otherwise report the problem to the user. { - std::string S; - raw_string_ostream OS(S); - OS << "Unsupported expression in static initializer: "; - WriteAsOperand(OS, CE, /*PrintType=*/false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); - report_fatal_error(OS.str()); + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + WriteAsOperand(OS, CE, /*PrintType=*/ false, + !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { const DataLayout &TD = *AP.TM.getDataLayout(); @@ -182,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // expression properly. This is important for differences between // blockaddress labels. Since the two labels are in the same function, it // is reasonable to treat their delta as a 32-bit value. - // FALL THROUGH. + // FALL THROUGH. case Instruction::BitCast: return LowerConstant(CE->getOperand(0), AP); @@ -192,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), - false/*ZExt*/); + false /*ZExt*/); return LowerConstant(Op, AP); } @@ -214,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { // the high bits so we are sure to get a proper truncation if the input is // a constant expr. unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); - const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); + const MCExpr *MaskExpr = + MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } - // The MC library also has a right-shift operator, but it isn't consistently + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: case Instruction::Sub: @@ -232,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP); const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP); switch (CE->getOpcode()) { - default: llvm_unreachable("Unknown binary operator constant cast expr"); - case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); - case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); - case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); - case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); - case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); - case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); - case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); - case Instruction::Or: return MCBinaryExpr::CreateOr (LHS, RHS, Ctx); - case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); + default: + llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: + return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx); + case Instruction::Sub: + return MCBinaryExpr::CreateSub(LHS, RHS, Ctx); + case Instruction::Mul: + return MCBinaryExpr::CreateMul(LHS, RHS, Ctx); + case Instruction::SDiv: + return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx); + case Instruction::SRem: + return MCBinaryExpr::CreateMod(LHS, RHS, Ctx); + case Instruction::Shl: + return MCBinaryExpr::CreateShl(LHS, RHS, Ctx); + case Instruction::And: + return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx); + case Instruction::Or: + return MCBinaryExpr::CreateOr(LHS, RHS, Ctx); + case Instruction::Xor: + return MCBinaryExpr::CreateXor(LHS, RHS, Ctx); } } } } - -void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) -{ +void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { if (!EmitLineNumbers) return; if (ignoreLoc(MI)) @@ -268,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (curLoc.isUnknown()) return; - const MachineFunction *MF = MI.getParent()->getParent(); //const TargetMachine &TM = MF->getTarget(); @@ -289,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) if (filenameMap.find(fileName.str()) == filenameMap.end()) return; - // Emit the line from the source file. if (llvm::InterleaveSrcInPtx) this->emitSrcInText(fileName.str(), curLoc.getLine()); std::stringstream temp; - temp << "\t.loc " << filenameMap[fileName.str()] - << " " << curLoc.getLine() << " " << curLoc.getCol(); + temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine() + << " " << curLoc.getCol(); OutStreamer.EmitRawText(Twine(temp.str().c_str())); } @@ -309,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(OS.str()); } -void NVPTXAsmPrinter::printReturnValStr(const Function *F, - raw_ostream &O) -{ +void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const TargetLowering *TLI = TM.getTargetLowering(); @@ -329,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; + if (size < 32) + size = 32; } else { - assert(Ty->isFloatingPointTy() && - "Floating point type expected here"); + assert(Ty->isFloatingPointTy() && "Floating point type expected here"); size = Ty->getPrimitiveSizeInBits(); } O << ".param .b" << size << " func_retval0"; - } - else if (isa<PointerType>(Ty)) { + } else if (isa<PointerType>(Ty)) { O << ".param .b" << TLI->getPointerTy().getSizeInBits() - << " func_retval0"; + << " func_retval0"; } else { - if ((Ty->getTypeID() == Type::StructTyID) || - isa<VectorType>(Ty)) { + if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } unsigned retAlignment = 0; if (!llvm::getAlign(*F, 0, retAlignment)) retAlignment = TD->getABITypeAlignment(Ty); - O << ".param .align " - << retAlignment - << " .b8 func_retval0[" - << totalsz << "]"; + O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz + << "]"; } else - assert(false && - "Unknown return type"); + assert(false && "Unknown return type"); } } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, Ty, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -383,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " func_retval" << idx; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -411,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { // Set up MRI = &MF->getRegInfo(); F = MF->getFunction(); - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else { @@ -434,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { void NVPTXAsmPrinter::EmitFunctionBodyStart() { const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); unsigned numRegClasses = TRI.getNumRegClasses(); - VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1]; + VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1]; OutStreamer.EmitRawText(StringRef("{\n")); setAndEmitFunctionVirtualRegisters(*MF); @@ -446,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() { void NVPTXAsmPrinter::EmitFunctionBodyEnd() { OutStreamer.EmitRawText(StringRef("}\n")); - delete []VRidGlobal2LocalMap; + delete[] VRidGlobal2LocalMap; } - -void -NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const { +void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, + raw_ostream &O) const { // If the NVVM IR has some of reqntid* specified, then output // the reqntid directive, and set the unspecified ones to 1. // If none of reqntid* is specified, don't output reqntid directive. unsigned reqntidx, reqntidy, reqntidz; bool specified = false; - if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1; - else specified = true; - if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1; - else specified = true; - if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1; - else specified = true; + if (llvm::getReqNTIDx(F, reqntidx) == false) + reqntidx = 1; + else + specified = true; + if (llvm::getReqNTIDy(F, reqntidy) == false) + reqntidy = 1; + else + specified = true; + if (llvm::getReqNTIDz(F, reqntidz) == false) + reqntidz = 1; + else + specified = true; if (specified) - O << ".reqntid " << reqntidx << ", " - << reqntidy << ", " << reqntidz << "\n"; + O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz + << "\n"; // If the NVVM IR has some of maxntid* specified, then output // the maxntid directive, and set the unspecified ones to 1. // If none of maxntid* is specified, don't output maxntid directive. unsigned maxntidx, maxntidy, maxntidz; specified = false; - if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1; - else specified = true; - if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1; - else specified = true; - if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1; - else specified = true; + if (llvm::getMaxNTIDx(F, maxntidx) == false) + maxntidx = 1; + else + specified = true; + if (llvm::getMaxNTIDy(F, maxntidy) == false) + maxntidy = 1; + else + specified = true; + if (llvm::getMaxNTIDz(F, maxntidz) == false) + maxntidz = 1; + else + specified = true; if (specified) - O << ".maxntid " << maxntidx << ", " - << maxntidy << ", " << maxntidz << "\n"; + O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz + << "\n"; unsigned mincta; if (llvm::getMinCTASm(F, mincta)) O << ".minnctapersm " << mincta << "\n"; } -void -NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, - raw_ostream &O) { - const TargetRegisterClass * RC = MRI->getRegClass(vr); +void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, + raw_ostream &O) { + const TargetRegisterClass *RC = MRI->getRegClass(vr); unsigned id = RC->getID(); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id]; @@ -506,44 +512,38 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec, report_fatal_error("Bad register!"); } -void -NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, - raw_ostream &O) { +void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec, + raw_ostream &O) { getVirtualRegisterName(vr, isVec, O); } -void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, - raw_ostream &O) { - static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'}; - int Imm = (int)MO.getImm(); - if(0 == strcmp(Modifier, "vecelem")) +void NVPTXAsmPrinter::printVecModifiedImmediate( + const MachineOperand &MO, const char *Modifier, raw_ostream &O) { + static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' }; + int Imm = (int) MO.getImm(); + if (0 == strcmp(Modifier, "vecelem")) O << "_" << vecelem[Imm]; - else if(0 == strcmp(Modifier, "vecv4comm1")) { - if((Imm < 0) || (Imm > 3)) + else if (0 == strcmp(Modifier, "vecv4comm1")) { + if ((Imm < 0) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4comm2")) { - if((Imm < 4) || (Imm > 7)) + } else if (0 == strcmp(Modifier, "vecv4comm2")) { + if ((Imm < 4) || (Imm > 7)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv4pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%4]; - } - else if(0 == strcmp(Modifier, "vecv2comm1")) { - if((Imm < 0) || (Imm > 1)) + } else if (0 == strcmp(Modifier, "vecv4pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 4]; + } else if (0 == strcmp(Modifier, "vecv2comm1")) { + if ((Imm < 0) || (Imm > 1)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2comm2")) { - if((Imm < 2) || (Imm > 3)) + } else if (0 == strcmp(Modifier, "vecv2comm2")) { + if ((Imm < 2) || (Imm > 3)) O << "//"; - } - else if(0 == strcmp(Modifier, "vecv2pos")) { - if(Imm < 0) Imm = 0; - O << "_" << vecelem[Imm%2]; - } - else + } else if (0 == strcmp(Modifier, "vecv2pos")) { + if (Imm < 0) + Imm = 0; + O << "_" << vecelem[Imm % 2]; + } else llvm_unreachable("Unknown Modifier on immediate operand"); } @@ -565,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, emitVirtualRegister(MO.getReg(), true, O); else llvm_unreachable( - "Don't know how to handle the modifier on virtual register."); + "Don't know how to handle the modifier on virtual register."); } } return; @@ -576,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, else if (strstr(Modifier, "vec") == Modifier) printVecModifiedImmediate(MO, Modifier, O); else - llvm_unreachable("Don't know how to handle modifier on immediate operand"); + llvm_unreachable( + "Don't know how to handle modifier on immediate operand"); return; case MachineOperand::MO_FPImmediate: @@ -588,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, break; case MachineOperand::MO_ExternalSymbol: { - const char * symbname = MO.getSymbolName(); + const char *symbname = MO.getSymbolName(); if (strstr(symbname, ".PARAM") == symbname) { unsigned index; - sscanf(symbname+6, "%u[];", &index); + sscanf(symbname + 6, "%u[];", &index); printParamName(index, O); - } - else if (strstr(symbname, ".HLPPARAM") == symbname) { + } else if (strstr(symbname, ".HLPPARAM") == symbname) { unsigned index; - sscanf(symbname+9, "%u[];", &index); + sscanf(symbname + 9, "%u[];", &index); O << *CurrentFnSym << "_param_" << index << "_offset"; - } - else + } else O << symbname; break; } @@ -613,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, } } -void NVPTXAsmPrinter:: -printImplicitDef(const MachineInstr *MI, raw_ostream &O) const { +void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI, + raw_ostream &O) const { #ifndef __OPTIMIZE__ O << "\t// Implicit def :"; //printOperand(MI, 0); @@ -628,32 +627,41 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, if (Modifier && !strcmp(Modifier, "add")) { O << ", "; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } else { - if (MI->getOperand(opNum+1).isImm() && - MI->getOperand(opNum+1).getImm() == 0) + if (MI->getOperand(opNum + 1).isImm() && + MI->getOperand(opNum + 1).getImm() == 0) return; // don't print ',0' or '+0' O << "+"; - printOperand(MI, opNum+1, O); + printOperand(MI, opNum + 1, O); } } void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, - raw_ostream &O, const char *Modifier) -{ + raw_ostream &O, const char *Modifier) { if (Modifier) { const MachineOperand &MO = MI->getOperand(opNum); - int Imm = (int)MO.getImm(); + int Imm = (int) MO.getImm(); if (!strcmp(Modifier, "volatile")) { if (Imm) O << ".volatile"; } else if (!strcmp(Modifier, "addsp")) { switch (Imm) { - case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break; - case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break; - case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break; - case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break; - case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break; + case NVPTX::PTXLdStInstCode::GLOBAL: + O << ".global"; + break; + case NVPTX::PTXLdStInstCode::SHARED: + O << ".shared"; + break; + case NVPTX::PTXLdStInstCode::LOCAL: + O << ".local"; + break; + case NVPTX::PTXLdStInstCode::PARAM: + O << ".param"; + break; + case NVPTX::PTXLdStInstCode::CONSTANT: + O << ".const"; + break; case NVPTX::PTXLdStInstCode::GENERIC: if (!nvptxSubtarget.hasGenericLdSt()) O << ".global"; @@ -661,31 +669,27 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum, default: llvm_unreachable("Wrong Address Space"); } - } - else if (!strcmp(Modifier, "sign")) { - if (Imm==NVPTX::PTXLdStInstCode::Signed) + } else if (!strcmp(Modifier, "sign")) { + if (Imm == NVPTX::PTXLdStInstCode::Signed) O << "s"; - else if (Imm==NVPTX::PTXLdStInstCode::Unsigned) + else if (Imm == NVPTX::PTXLdStInstCode::Unsigned) O << "u"; else O << "f"; - } - else if (!strcmp(Modifier, "vec")) { - if (Imm==NVPTX::PTXLdStInstCode::V2) + } else if (!strcmp(Modifier, "vec")) { + if (Imm == NVPTX::PTXLdStInstCode::V2) O << ".v2"; - else if (Imm==NVPTX::PTXLdStInstCode::V4) + else if (Imm == NVPTX::PTXLdStInstCode::V4) O << ".v4"; - } - else + } else llvm_unreachable("Unknown Modifier"); - } - else + } else llvm_unreachable("Empty Modifier"); } -void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { - emitLinkageDirective(F,O); + emitLinkageDirective(F, O); if (llvm::isKernelFunction(*F)) O << ".entry "; else @@ -696,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) { O << ";\n"; } -static bool usedInGlobalVarDef(const Constant *C) -{ +static bool usedInGlobalVarDef(const Constant *C) { if (!C) return false; @@ -707,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C) return true; } - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { const Constant *C = dyn_cast<Constant>(*ui); if (usedInGlobalVarDef(C)) return true; @@ -716,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C) return false; } -static bool usedInOneFunc(const User *U, Function const *&oneFunc) -{ +static bool usedInOneFunc(const User *U, Function const *&oneFunc) { if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { if (othergv->getName().str() == "llvm.used") return true; @@ -730,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) return false; oneFunc = curFunc; return true; - } - else + } else return false; } if (const MDNode *md = dyn_cast<MDNode>(U)) if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") || - (md->getName().str() == "llvm.dbg.sp"))) + (md->getName().str() == "llvm.dbg.sp"))) return true; - - for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end(); - ui!=ue; ++ui) { + for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end(); + ui != ue; ++ui) { if (usedInOneFunc(*ui, oneFunc) == false) return false; } @@ -776,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { static bool useFuncSeen(const Constant *C, llvm::DenseMap<const Function *, bool> &seenMap) { - for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end(); - ui!=ue; ++ui) { + for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end(); + ui != ue; ++ui) { if (const Constant *cu = dyn_cast<Constant>(*ui)) { if (useFuncSeen(cu, seenMap)) return true; } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) { const BasicBlock *bb = I->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; if (seenMap.find(caller) != seenMap.end()) return true; } @@ -793,10 +795,9 @@ static bool useFuncSeen(const Constant *C, return false; } -void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { llvm::DenseMap<const Function *, bool> seenMap; - for (Module::const_iterator FI=M.begin(), FE=M.end(); - FI!=FE; ++FI) { + for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { const Function *F = FI; if (F->isDeclaration()) { @@ -808,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { emitDeclaration(F, O); continue; } - for (Value::const_use_iterator iter=F->use_begin(), - iterEnd=F->use_end(); iter!=iterEnd; ++iter) { + for (Value::const_use_iterator iter = F->use_begin(), + iterEnd = F->use_end(); + iter != iterEnd; ++iter) { if (const Constant *C = dyn_cast<Constant>(*iter)) { if (usedInGlobalVarDef(C)) { // The use is in the initialization of a global variable @@ -828,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) { } } - if (!isa<Instruction>(*iter)) continue; + if (!isa<Instruction>(*iter)) + continue; const Instruction *instr = cast<Instruction>(*iter); const BasicBlock *bb = instr->getParent(); - if (!bb) continue; + if (!bb) + continue; const Function *caller = bb->getParent(); - if (!caller) continue; + if (!caller) + continue; // If a caller has already been seen, then the caller is // appearing in the module before the callee. so print out @@ -852,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { DebugInfoFinder DbgFinder; DbgFinder.processModule(M); - unsigned i=1; + unsigned i = 1; for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { + E = DbgFinder.compile_unit_end(); + I != E; ++I) { DICompileUnit DIUnit(*I); StringRef Filename(DIUnit.getFilename()); StringRef Dirname(DIUnit.getDirectory()); @@ -871,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) { + E = DbgFinder.subprogram_end(); + I != E; ++I) { DISubprogram SP(*I); StringRef Filename(SP.getFilename()); StringRef Dirname(SP.getDirectory()); @@ -887,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } } -bool NVPTXAsmPrinter::doInitialization (Module &M) { +bool NVPTXAsmPrinter::doInitialization(Module &M) { SmallString<128> Str1; raw_svector_ostream OS1(Str1); @@ -899,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { //bool Result = AsmPrinter::doInitialization(M); // Initialize TargetLoweringObjectFile. - const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) - .Initialize(OutContext, TM); + const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) + .Initialize(OutContext, TM); Mang = new Mangler(OutContext, *TM.getDataLayout()); @@ -908,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { emitHeader(M, OS1); OutStreamer.EmitRawText(OS1.str()); - // Already commented out //bool Result = AsmPrinter::doInitialization(M); - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) recordAndEmitFilenames(M); @@ -926,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { // global variable in order, and ensure that we emit it *after* its dependent // globals. We use a little extra memory maintaining both a set and a list to // have fast searches while maintaining a strict ordering. - SmallVector<GlobalVariable*,8> Globals; - DenseSet<GlobalVariable*> GVVisited; - DenseSet<GlobalVariable*> GVVisiting; + SmallVector<GlobalVariable *, 8> Globals; + DenseSet<GlobalVariable *> GVVisited; + DenseSet<GlobalVariable *> GVVisiting; // Visit each global variable, in order - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; + ++I) VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); - assert(GVVisited.size() == M.getGlobalList().size() && + assert(GVVisited.size() == M.getGlobalList().size() && "Missed a global variable"); assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); @@ -946,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) { OS2 << '\n'; OutStreamer.EmitRawText(OS2.str()); - return false; // success + return false; // success } -void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { O << "//\n"; O << "// Generated by LLVM NVPTX Back-End\n"; O << "//\n"; @@ -989,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { Module::GlobalListType &global_list = M.getGlobalList(); int i, n = global_list.size(); - GlobalVariable **gv_array = new GlobalVariable* [n]; + GlobalVariable **gv_array = new GlobalVariable *[n]; // first, back-up GlobalVariable in gv_array i = 0; for (Module::global_iterator I = global_list.begin(), E = global_list.end(); - I != E; ++I) + I != E; ++I) gv_array[i++] = &*I; // second, empty global_list @@ -1005,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { bool ret = AsmPrinter::doFinalization(M); // now we restore global variables - for (i = 0; i < n; i ++) + for (i = 0; i < n; i++) global_list.insert(global_list.end(), gv_array[i]); delete[] gv_array; return ret; - //bool Result = AsmPrinter::doFinalization(M); // Instead of calling the parents doFinalization, we may // clone parents doFinalization and customize here. @@ -1031,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { // external without init -> .extern // appending -> not allowed, assert. -void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) -{ +void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, + raw_ostream &O) { if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { if (V->hasExternalLinkage()) { if (isa<GlobalVariable>(V)) { @@ -1059,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O) } } - -void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, +void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool processDemoted) { // Skip meta data @@ -1111,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, if (Initializer) CI = dyn_cast<ConstantInt>(Initializer); if (CI) { - unsigned sample=CI->getZExtValue(); + unsigned sample = CI->getZExtValue(); O << " = { "; - for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >> - __CLK_ADDRESS_BASE) ; i < 3 ; i++) { + for (int i = 0, + addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); + i < 3; i++) { O << "addr_mode_" << i << " = "; switch (addr) { - case 0: O << "wrap"; break; - case 1: O << "clamp_to_border"; break; - case 2: O << "clamp_to_edge"; break; - case 3: O << "wrap"; break; - case 4: O << "mirror"; break; + case 0: + O << "wrap"; + break; + case 1: + O << "clamp_to_border"; + break; + case 2: + O << "clamp_to_edge"; + break; + case 3: + O << "wrap"; + break; + case 4: + O << "mirror"; + break; } - O <<", "; + O << ", "; } O << "filter_mode = "; - switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) { - case 0: O << "nearest"; break; - case 1: O << "linear"; break; - case 2: assert ( 0 && "Anisotropic filtering is not supported"); - default: O << "nearest"; break; + switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { + case 0: + O << "nearest"; + break; + case 1: + O << "linear"; + break; + case 2: + assert(0 && "Anisotropic filtering is not supported"); + default: + O << "nearest"; + break; } - if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) { + if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { O << ", force_unnormalized_coords = 1"; } O << " }"; @@ -1176,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, else O << " .align " << GVar->getAlignment(); - if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) { O << " ."; O << getPTXFundamentalTypeStr(ETy, false); @@ -1186,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and global state // spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); if (!Initializer->isNullValue()) { - O << " = " ; + O << " = "; printScalarConstant(Initializer, O); } } } else { - unsigned int ElementSize =0; + unsigned int ElementSize = 0; // Although PTX has direct support for struct type and array type and // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for @@ -1210,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, // Ptx allows variable initilization only for constant and // global state spaces. if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) - && GVar->hasInitializer()) { + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && + GVar->hasInitializer()) { Constant *Initializer = GVar->getInitializer(); - if (!isa<UndefValue>(Initializer) && - !Initializer->isNullValue()) { + if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { if (nvptxSubtarget.is64Bit()) { - O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/8; - } - else { - O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ; - O << ElementSize/4; + O << " .u64 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 8; + } else { + O << " .u32 " << *Mang->getSymbol(GVar) << "["; + O << ElementSize / 4; } O << "]"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar) << "["; O << ElementSize; O << "]"; } - O << " = {" ; + O << " = {"; aggBuffer.print(); O << "}"; - } - else { - O << " .b8 " << *Mang->getSymbol(GVar) ; + } else { + O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } - } - else { + } else { O << " .b8 " << *Mang->getSymbol(GVar); if (ElementSize) { - O <<"[" ; + O << "["; O << ElementSize; O << "]"; } } break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } } @@ -1270,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { std::vector<GlobalVariable *> &gvars = localDecls[f]; - for (unsigned i=0, e=gvars.size(); i!=e; ++i) { + for (unsigned i = 0, e = gvars.size(); i != e; ++i) { O << "\t// demoted variable\n\t"; printModuleLevelGV(gvars[i], O, true); } @@ -1280,24 +1295,24 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const { switch (AddressSpace) { case llvm::ADDRESS_SPACE_LOCAL: - O << "local" ; + O << "local"; break; case llvm::ADDRESS_SPACE_GLOBAL: - O << "global" ; + O << "global"; break; case llvm::ADDRESS_SPACE_CONST: // This logic should be consistent with that in // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp) if (nvptxSubtarget.hasGenericLdSt()) - O << "global" ; + O << "global"; else - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: - O << "const" ; + O << "const"; break; case llvm::ADDRESS_SPACE_SHARED: - O << "shared" ; + O << "shared"; break; default: report_fatal_error("Bad address space found while emitting PTX"); @@ -1305,8 +1320,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, } } -std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, - bool useB4PTR) const { +std::string +NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { switch (Ty->getTypeID()) { default: llvm_unreachable("unexpected type"); @@ -1330,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, return "f64"; case Type::PointerTyID: if (nvptxSubtarget.is64Bit()) - if (useB4PTR) return "b64"; - else return "u64"; + if (useB4PTR) + return "b64"; + else + return "u64"; + else if (useB4PTR) + return "b32"; else - if (useB4PTR) return "b32"; - else return "u32"; + return "u32"; } llvm_unreachable("unexpected type"); return NULL; } -void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, +void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); @@ -1364,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, return; } - int64_t ElementSize =0; + int64_t ElementSize = 0; // Although PTX has direct support for struct type and array type and LLVM IR // is very similar to PTX, the LLVM CodeGen does not support for targets that @@ -1375,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar, case Type::ArrayTyID: case Type::VectorTyID: ElementSize = TD->getTypeStoreSize(ETy); - O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ; + O << " .b8 " << *Mang->getSymbol(GVar) << "["; if (ElementSize) { - O << itostr(ElementSize) ; + O << itostr(ElementSize); } O << "]"; break; default: - assert( 0 && "type not supported yet"); + assert(0 && "type not supported yet"); } - return ; + return; } - -static unsigned int -getOpenCLAlignment(const DataLayout *TD, - Type *Ty) { +static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty)) return TD->getPrefTypeAlignment(Ty); @@ -1404,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int numE = VTy->getNumElements(); unsigned int alignE = TD->getPrefTypeAlignment(ETy); if (numE == 3) - return 4*alignE; + return 4 * alignE; else - return numE*alignE; + return numE * alignE; } const StructType *STy = dyn_cast<StructType>(Ty); @@ -1414,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD, unsigned int alignStruct = 1; // Go through each element of the struct and find the // largest alignment. - for (unsigned i=0, e=STy->getNumElements(); i != e; i++) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { Type *ETy = STy->getElementType(i); unsigned int align = getOpenCLAlignment(TD, ETy); if (align > alignStruct) @@ -1458,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { } for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) { - if (i==paramIndex) { + if (i == paramIndex) { printParamName(I, paramIndex, O); return; } @@ -1466,8 +1481,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { llvm_unreachable("paramIndex out of bound"); } -void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, - raw_ostream &O) { +void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const AttributeSet &PAL = F->getAttributes(); const TargetLowering *TLI = TM.getTargetLowering(); @@ -1481,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "(\n"; for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { - const Type *Ty = I->getType(); + Type *Ty = I->getType(); if (!first) O << ",\n"; @@ -1496,14 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex; else // Default image is read_only O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex; - } - else // Should be llvm::isSampler(*I) + } else // Should be llvm::isSampler(*I) O << "\t.param .samplerref " << *CurrentFnSym << "_param_" - << paramIndex; + << paramIndex; continue; } - if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) { + if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { + if (Ty->isVectorTy()) { + // Just print .param .b8 .align <a> .param[size]; + // <a> = PAL.getparamalignment + // size = typeallocsize of element type + unsigned align = PAL.getParamAlignment(paramIndex + 1); + if (align == 0) + align = TD->getABITypeAlignment(Ty); + + unsigned sz = TD->getTypeAllocSize(Ty); + O << "\t.param .align " << align << " .b8 "; + printParamName(I, paramIndex, O); + O << "[" << sz << "]"; + + continue; + } // Just a scalar const PointerType *PTy = dyn_cast<PointerType>(Ty); if (isKernelFunc) { @@ -1514,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { Type *ETy = PTy->getElementType(); int addrSpace = PTy->getAddressSpace(); - switch(addrSpace) { + switch (addrSpace) { default: O << ".ptr "; break; @@ -1529,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, O << ".ptr .global "; break; } - O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " "; + O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " "; } printParamName(I, paramIndex, O); continue; } // non-pointer scalar to kernel func - O << "\t.param ." - << getPTXFundamentalTypeStr(Ty) << " "; + O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " "; printParamName(I, paramIndex, O); continue; } @@ -1546,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -1562,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // param has byVal attribute. So should be a pointer const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI || isKernelFunc) { // Just print .param .b8 .align <a> .param[size]; // <a> = PAL.getparamalignment // size = typeallocsize of element type - unsigned align = PAL.getParamAlignment(paramIndex+1); + unsigned align = PAL.getParamAlignment(paramIndex + 1); if (align == 0) align = TD->getABITypeAlignment(ETy); unsigned sz = TD->getTypeAllocSize(ETy); - O << "\t.param .align " << align - << " .b8 "; + O << "\t.param .align " << align << " .b8 "; printParamName(I, paramIndex, O); O << "[" << sz << "]"; continue; @@ -1587,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, // each vector element. SmallVector<EVT, 16> vtparts; ComputeValueVTs(*TLI, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -1595,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << "\t.reg .b" << sz << " "; printParamName(I, paramIndex, O); - if (j<je-1) O << ",\n"; + if (j < je - 1) + O << ",\n"; ++paramIndex; } - if (i<e-1) + if (i < e - 1) O << ",\n"; } --paramIndex; @@ -1620,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, emitFunctionParamList(F, O); } - -void NVPTXAsmPrinter:: -setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { +void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( + const MachineFunction &MF) { SmallString<128> Str; raw_svector_ostream O(Str); @@ -1635,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); int NumBytes = (int) MFI->getStackSize(); if (NumBytes) { - O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" - << DEPOTNAME - << getFunctionNumber() << "[" << NumBytes << "];\n"; + O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME + << getFunctionNumber() << "[" << NumBytes << "];\n"; if (nvptxSubtarget.is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; - } - else { + } else { O << "\t.reg .b32 \t%SP;\n"; O << "\t.reg .b32 \t%SPL;\n"; } @@ -1653,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { // register number and the per class virtual register number. // We use the per class virtual register number in the ptx output. unsigned int numVRs = MRI->getNumVirtRegs(); - for (unsigned i=0; i< numVRs; i++) { + for (unsigned i = 0; i < numVRs; i++) { unsigned int vr = TRI->index2VirtReg(i); const TargetRegisterClass *RC = MRI->getRegClass(vr); std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()]; int n = regmap.size(); - regmap.insert(std::make_pair(vr, n+1)); + regmap.insert(std::make_pair(vr, n + 1)); } // Emit register declarations @@ -1702,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) { OutStreamer.EmitRawText(O.str()); } - void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { - APFloat APF = APFloat(Fp->getValueAPF()); // make a copy + APFloat APF = APFloat(Fp->getValueAPF()); // make a copy bool ignored; unsigned int numHex; const char *lead; - if (Fp->getType()->getTypeID()==Type::FloatTyID) { + if (Fp->getType()->getTypeID() == Type::FloatTyID) { numHex = 8; lead = "0f"; - APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored); } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { numHex = 16; lead = "0d"; - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); } else llvm_unreachable("unsupported fp type"); @@ -1760,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { llvm_unreachable("Not scalar type found in printScalarConstant()"); } - void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) { @@ -1768,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, if (isa<UndefValue>(CPV) || CPV->isNullValue()) { int s = TD->getTypeAllocSize(CPV->getType()); - if (s<Bytes) + if (s < Bytes) s = Bytes; aggBuffer->addZeros(s); return; @@ -1779,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::IntegerTyID: { const Type *ETy = CPV->getType(); - if ( ETy == Type::getInt8Ty(CPV->getContext()) ){ + if (ETy == Type::getInt8Ty(CPV->getContext())) { unsigned char c = (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); ptr = &c; aggBuffer->addBytes(ptr, 1, Bytes); - } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) { - short int16 = - (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); - ptr = (unsigned char*)&int16; + } else if (ETy == Type::getInt16Ty(CPV->getContext())) { + short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue(); + ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); - } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt32Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { - if (ConstantInt *constInt = - dyn_cast<ConstantInt>(ConstantFoldConstantExpression( - Cexpr, TD))) { - int int32 =(int)(constInt->getZExtValue()); - ptr = (unsigned char*)&int32; + if (ConstantInt *constInt = dyn_cast<ConstantInt>( + ConstantFoldConstantExpression(Cexpr, TD))) { + int int32 = (int)(constInt->getZExtValue()); + ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; } @@ -1812,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } } llvm_unreachable("unsupported integer const type"); - } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) { + } else if (ETy == Type::getInt64Ty(CPV->getContext())) { if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { if (ConstantInt *constInt = dyn_cast<ConstantInt>( - ConstantFoldConstantExpression(Cexpr, TD))) { - long long int64 =(long long)(constInt->getZExtValue()); - ptr = (unsigned char*)&int64; + ConstantFoldConstantExpression(Cexpr, TD))) { + long long int64 = (long long)(constInt->getZExtValue()); + ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; } @@ -1841,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::FloatTyID: case Type::DoubleTyID: { ConstantFP *CFP = dyn_cast<ConstantFP>(CPV); - const Type* Ty = CFP->getType(); + const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { - float float32 = (float)CFP->getValueAPF().convertToFloat(); - ptr = (unsigned char*)&float32; + float float32 = (float) CFP->getValueAPF().convertToFloat(); + ptr = (unsigned char *)&float32; aggBuffer->addBytes(ptr, 4, Bytes); } else if (Ty == Type::getDoubleTy(CPV->getContext())) { double float64 = CFP->getValueAPF().convertToDouble(); - ptr = (unsigned char*)&float64; + ptr = (unsigned char *)&float64; aggBuffer->addBytes(ptr, 8, Bytes); - } - else { + } else { llvm_unreachable("unsupported fp const type"); } break; @@ -1859,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, case Type::PointerTyID: { if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { aggBuffer->addSymbol(GVar); - } - else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v); } @@ -1876,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, isa<ConstantStruct>(CPV)) { int ElementSize = TD->getTypeAllocSize(CPV->getType()); bufferAggregateConstant(CPV, aggBuffer); - if ( Bytes > ElementSize ) - aggBuffer->addZeros(Bytes-ElementSize); - } - else if (isa<ConstantAggregateZero>(CPV)) + if (Bytes > ElementSize) + aggBuffer->addZeros(Bytes - ElementSize); + } else if (isa<ConstantAggregateZero>(CPV)) aggBuffer->addZeros(Bytes); else llvm_unreachable("Unexpected Constant type"); @@ -1905,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, } if (const ConstantDataSequential *CDS = - dyn_cast<ConstantDataSequential>(CPV)) { + dyn_cast<ConstantDataSequential>(CPV)) { if (CDS->getNumElements()) for (unsigned i = 0; i < CDS->getNumElements(); ++i) bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, @@ -1913,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, return; } - if (isa<ConstantStruct>(CPV)) { if (CPV->getNumOperands()) { StructType *ST = cast<StructType>(CPV->getType()); for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { - if ( i == (e - 1)) + if (i == (e - 1)) Bytes = TD->getStructLayout(ST)->getElementOffset(0) + - TD->getTypeAllocSize(ST) - - TD->getStructLayout(ST)->getElementOffset(i); + TD->getTypeAllocSize(ST) - + TD->getStructLayout(ST)->getElementOffset(i); else - Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) - - TD->getStructLayout(ST)->getElementOffset(i); - bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, - aggBuffer); + Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) - + TD->getStructLayout(ST)->getElementOffset(i); + bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); } } return; @@ -1937,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, // buildTypeNameMap - Run through symbol table looking for type names. // - bool NVPTXAsmPrinter::isImageType(const Type *Ty) { std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty); - if (PI != TypeNameMap.end() && - (!PI->second.compare("struct._image1d_t") || - !PI->second.compare("struct._image2d_t") || - !PI->second.compare("struct._image3d_t"))) + if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") || + !PI->second.compare("struct._image2d_t") || + !PI->second.compare("struct._image3d_t"))) return true; return false; @@ -1955,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) { /// bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. + if (ExtraCode[1] != 0) + return true; // Unknown modifier. switch (ExtraCode[0]) { default: @@ -1974,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } -bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { +bool NVPTXAsmPrinter::PrintAsmMemoryOperand( + const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier + return true; // Unknown modifier O << '['; printMemOperand(MI, OpNo, O); @@ -1989,41 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) -{ - switch(MI.getOpcode()) { +bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: return false; - case NVPTX::CallArgBeginInst: case NVPTX::CallArgEndInst0: - case NVPTX::CallArgEndInst1: case NVPTX::CallArgF32: - case NVPTX::CallArgF64: case NVPTX::CallArgI16: - case NVPTX::CallArgI32: case NVPTX::CallArgI32imm: - case NVPTX::CallArgI64: case NVPTX::CallArgI8: - case NVPTX::CallArgParam: case NVPTX::CallVoidInst: - case NVPTX::CallVoidInstReg: case NVPTX::Callseq_End: + case NVPTX::CallArgBeginInst: + case NVPTX::CallArgEndInst0: + case NVPTX::CallArgEndInst1: + case NVPTX::CallArgF32: + case NVPTX::CallArgF64: + case NVPTX::CallArgI16: + case NVPTX::CallArgI32: + case NVPTX::CallArgI32imm: + case NVPTX::CallArgI64: + case NVPTX::CallArgI8: + case NVPTX::CallArgParam: + case NVPTX::CallVoidInst: + case NVPTX::CallVoidInstReg: + case NVPTX::Callseq_End: case NVPTX::CallVoidInstReg64: - case NVPTX::DeclareParamInst: case NVPTX::DeclareRetMemInst: - case NVPTX::DeclareRetRegInst: case NVPTX::DeclareRetScalarInst: - case NVPTX::DeclareScalarParamInst: case NVPTX::DeclareScalarRegInst: - case NVPTX::StoreParamF32: case NVPTX::StoreParamF64: - case NVPTX::StoreParamI16: case NVPTX::StoreParamI32: - case NVPTX::StoreParamI64: case NVPTX::StoreParamI8: - case NVPTX::StoreParamS32I8: case NVPTX::StoreParamU32I8: - case NVPTX::StoreParamS32I16: case NVPTX::StoreParamU32I16: - case NVPTX::StoreRetvalF32: case NVPTX::StoreRetvalF64: - case NVPTX::StoreRetvalI16: case NVPTX::StoreRetvalI32: - case NVPTX::StoreRetvalI64: case NVPTX::StoreRetvalI8: - case NVPTX::LastCallArgF32: case NVPTX::LastCallArgF64: - case NVPTX::LastCallArgI16: case NVPTX::LastCallArgI32: - case NVPTX::LastCallArgI32imm: case NVPTX::LastCallArgI64: - case NVPTX::LastCallArgI8: case NVPTX::LastCallArgParam: - case NVPTX::LoadParamMemF32: case NVPTX::LoadParamMemF64: - case NVPTX::LoadParamMemI16: case NVPTX::LoadParamMemI32: - case NVPTX::LoadParamMemI64: case NVPTX::LoadParamMemI8: - case NVPTX::LoadParamRegF32: case NVPTX::LoadParamRegF64: - case NVPTX::LoadParamRegI16: case NVPTX::LoadParamRegI32: - case NVPTX::LoadParamRegI64: case NVPTX::LoadParamRegI8: - case NVPTX::PrototypeInst: case NVPTX::DBG_VALUE: + case NVPTX::DeclareParamInst: + case NVPTX::DeclareRetMemInst: + case NVPTX::DeclareRetRegInst: + case NVPTX::DeclareRetScalarInst: + case NVPTX::DeclareScalarParamInst: + case NVPTX::DeclareScalarRegInst: + case NVPTX::StoreParamF32: + case NVPTX::StoreParamF64: + case NVPTX::StoreParamI16: + case NVPTX::StoreParamI32: + case NVPTX::StoreParamI64: + case NVPTX::StoreParamI8: + case NVPTX::StoreParamS32I8: + case NVPTX::StoreParamU32I8: + case NVPTX::StoreParamS32I16: + case NVPTX::StoreParamU32I16: + case NVPTX::StoreRetvalF32: + case NVPTX::StoreRetvalF64: + case NVPTX::StoreRetvalI16: + case NVPTX::StoreRetvalI32: + case NVPTX::StoreRetvalI64: + case NVPTX::StoreRetvalI8: + case NVPTX::LastCallArgF32: + case NVPTX::LastCallArgF64: + case NVPTX::LastCallArgI16: + case NVPTX::LastCallArgI32: + case NVPTX::LastCallArgI32imm: + case NVPTX::LastCallArgI64: + case NVPTX::LastCallArgI8: + case NVPTX::LastCallArgParam: + case NVPTX::LoadParamMemF32: + case NVPTX::LoadParamMemF64: + case NVPTX::LoadParamMemI16: + case NVPTX::LoadParamMemI32: + case NVPTX::LoadParamMemI64: + case NVPTX::LoadParamMemI8: + case NVPTX::LoadParamRegF32: + case NVPTX::LoadParamRegF64: + case NVPTX::LoadParamRegI16: + case NVPTX::LoadParamRegI32: + case NVPTX::LoadParamRegI64: + case NVPTX::LoadParamRegI8: + case NVPTX::PrototypeInst: + case NVPTX::DBG_VALUE: return true; } return false; @@ -2035,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() { RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64); } - void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { std::stringstream temp; - LineReader * reader = this->getReader(filename.str()); + LineReader *reader = this->getReader(filename.str()); temp << "\n//"; temp << filename.str(); temp << ":"; @@ -2049,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { this->OutStreamer.EmitRawText(Twine(temp.str())); } - LineReader *NVPTXAsmPrinter::getReader(std::string filename) { - if (reader == NULL) { - reader = new LineReader(filename); + if (reader == NULL) { + reader = new LineReader(filename); } if (reader->fileName() != filename) { delete reader; - reader = new LineReader(filename); + reader = new LineReader(filename); } return reader; } - -std::string -LineReader::readLine(unsigned lineNum) { +std::string LineReader::readLine(unsigned lineNum) { if (lineNum < theCurLine) { theCurLine = 0; - fstr.seekg(0,std::ios::beg); + fstr.seekg(0, std::ios::beg); } while (theCurLine < lineNum) { - fstr.getline(buff,500); + fstr.getline(buff, 500); theCurLine++; } return buff; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 42498f0bf7..6dc9fc0ffe 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -43,15 +43,15 @@ // This is defined in AsmPrinter.cpp. // Used to process the constant expressions in initializers. namespace nvptx { -const llvm::MCExpr *LowerConstant(const llvm::Constant *CV, - llvm::AsmPrinter &AP) ; +const llvm::MCExpr * +LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP); } namespace llvm { class LineReader { private: - unsigned theCurLine ; + unsigned theCurLine; std::ifstream fstr; char buff[512]; std::string theFileName; @@ -63,17 +63,12 @@ public: theFileName = filename; } std::string fileName() { return theFileName; } - ~LineReader() { - fstr.close(); - } + ~LineReader() { fstr.close(); } std::string readLine(unsigned line); }; - - class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { - class AggBuffer { // Used to buffer the emitted string for initializing global // aggregates. @@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { // Once we have this AggBuffer setup, we can choose how to print // it out. public: - unsigned size; // size of the buffer in bytes + unsigned size; // size of the buffer in bytes unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses SmallVector<unsigned, 4> symbolPosInBuffer; @@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { public: AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) - :O(_O),AP(_AP) { + : O(_O), AP(_AP) { buffer = new unsigned char[_size]; size = _size; curpos = 0; numSymbols = 0; } - ~AggBuffer() { - delete [] buffer; - } + ~AggBuffer() { delete[] buffer; } unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { - assert((curpos+Num) <= size); - assert((curpos+Bytes) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + assert((curpos + Bytes) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = Ptr[i]; - curpos ++; + curpos++; } - for ( int i=Num; i < Bytes ; ++i) { + for (int i = Num; i < Bytes; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } unsigned addZeros(int Num) { - assert((curpos+Num) <= size); - for ( int i= 0; i < Num; ++i) { + assert((curpos + Num) <= size); + for (int i = 0; i < Num; ++i) { buffer[curpos] = 0; - curpos ++; + curpos++; } return curpos; } @@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { void print() { if (numSymbols == 0) { // print out in bytes - for (unsigned i=0; i<size; i++) { + for (unsigned i = 0; i < size; i++) { if (i) O << ", "; - O << (unsigned int)buffer[i]; + O << (unsigned int) buffer[i]; } } else { // print out in 4-bytes or 8-bytes @@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned int nBytes = 4; if (AP.nvptxSubtarget.is64Bit()) nBytes = 8; - for (pos=0; pos<size; pos+=nBytes) { + for (pos = 0; pos < size; pos += nBytes) { if (pos) O << ", "; if (pos == nextSymbolPos) { @@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { MCSymbol *Name = AP.Mang->getSymbol(GVar); O << *Name; - } - else if (ConstantExpr *Cexpr = - dyn_cast<ConstantExpr>(v)) { + } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) { O << *nvptx::LowerConstant(Cexpr, AP); } else llvm_unreachable("symbol type unknown"); nSym++; if (nSym >= numSymbols) - nextSymbolPos = size+1; + nextSymbolPos = size + 1; else nextSymbolPos = symbolPosInBuffer[nSym]; - } else - if (nBytes == 4) - O << *(unsigned int*)(buffer+pos); - else - O << *(unsigned long long*)(buffer+pos); + } else if (nBytes == 4) + O << *(unsigned int *)(buffer + pos); + else + O << *(unsigned long long *)(buffer + pos); } } } @@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { virtual void emitSrcInText(StringRef filename, unsigned line); -private : - virtual const char *getPassName() const { - return "NVPTX Assembly Printer"; - } +private: + virtual const char *getPassName() const { return "NVPTX Assembly Printer"; } const Function *F; std::string CurrentFnName; @@ -207,31 +195,28 @@ private : void printGlobalVariable(const GlobalVariable *GVar); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); - void printVecModifiedImmediate(const MachineOperand &MO, - const char *Modifier, raw_ostream &O); + const char *Modifier = 0); + void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, + raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier=0); + const char *Modifier = 0); void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; // definition autogenerated. void printInstruction(const MachineInstr *MI, raw_ostream &O); - void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O, - bool=false); + void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false); void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitHeader(Module &M, raw_ostream &O); - void emitKernelFunctionDirectives(const Function& F, - raw_ostream &O) const; + void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); void emitFunctionExternParamList(const MachineFunction &MF); void emitFunctionParamList(const Function *, raw_ostream &O); void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O); void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); - void emitFunctionTempData(const MachineFunction &MF, - unsigned &FrameSize); + void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize); bool isImageType(const Type *Ty); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, @@ -269,17 +254,16 @@ private: void recordAndEmitFilenames(Module &); void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); - void emitPTXAddressSpace(unsigned int AddressSpace, - raw_ostream &O) const; - std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ; - void printScalarConstant(Constant *CPV, raw_ostream &O) ; - void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ; - void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ; - void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ; + void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; + std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; + void printScalarConstant(Constant *CPV, raw_ostream &O); + void printFPConstant(const ConstantFP *Fp, raw_ostream &O); + void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer); + void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer); void printOperandProper(const MachineOperand &MO); - void emitLinkageDirective(const GlobalValue* V, raw_ostream &O); + void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); void emitDeclarations(Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); @@ -289,10 +273,9 @@ private: LineReader *reader; LineReader *getReader(std::string); public: - NVPTXAsmPrinter(TargetMachine &TM, - MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { CurrentBankselLabelInBasicBlock = ""; VRidGlobal2LocalMap = NULL; reader = NULL; diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index bb2c55ceed..6533da5102 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -25,9 +25,7 @@ using namespace llvm; -bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { - return true; -} +bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFrameInfo()->hasStackObjects()) { @@ -42,46 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // mov %SPL, %depot; // cvta.local %SP, %SPL; if (is64bit) { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } else { - MachineInstr *MI = BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::cvta_local_yes), - NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); - BuildMI(MBB, MI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal) - .addReg(NVPTX::VRDepot); + MachineInstr *MI = BuildMI( + MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal); + BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot); } - } - else { + } else { // mov %SP, %depot; if (is64bit) - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); else - BuildMI(MBB, MBBI, dl, - tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame) - .addReg(NVPTX::VRDepot); + BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr), + NVPTX::VRFrame).addReg(NVPTX::VRDepot); } } } void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { -} + MachineBasicBlock &MBB) const {} // This function eliminates ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions -void NVPTXFrameLowering:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { +void NVPTXFrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { // Simply discard ADJCALLSTACKDOWN, // ADJCALLSTACKUP instructions. MBB.erase(I); } - diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index d34e7bec1d..819f1dd3f4 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -16,7 +16,6 @@ #include "llvm/Target/TargetFrameLowering.h" - namespace llvm { class NVPTXTargetMachine; @@ -26,13 +25,12 @@ class NVPTXFrameLowering : public TargetFrameLowering { public: explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit) - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), - tm(_tm), is64bit(_is64bit) {} + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm), + is64bit(_is64bit) {} virtual bool hasFP(const MachineFunction &MF) const; virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const; + virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 481f13afd1..e862988c85 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "NVPTXISelDAGToDAG.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instructions.h" @@ -26,27 +25,22 @@ using namespace llvm; - -static cl::opt<bool> -UseFMADInstruction("nvptx-mad-enable", - cl::ZeroOrMore, - cl::desc("NVPTX Specific: Enable generating FMAD instructions"), - cl::init(false)); +static cl::opt<bool> UseFMADInstruction( + "nvptx-mad-enable", cl::ZeroOrMore, + cl::desc("NVPTX Specific: Enable generating FMAD instructions"), + cl::init(false)); static cl::opt<int> -FMAContractLevel("nvptx-fma-level", - cl::ZeroOrMore, +FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::init(2)); - + " 1: do it 2: do it aggressively"), + cl::init(2)); -static cl::opt<int> -UsePrecDivF32("nvptx-prec-divf32", - cl::ZeroOrMore, - cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" - " IEEE Compliant F32 div.rnd if avaiable."), - cl::init(2)); +static cl::opt<int> UsePrecDivF32( + "nvptx-prec-divf32", cl::ZeroOrMore, + cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use" + " IEEE Compliant F32 div.rnd if avaiable."), + cl::init(2)); /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. @@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, return new NVPTXDAGToDAGISel(TM, OptLevel); } - NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, CodeGenOpt::Level OptLevel) -: SelectionDAGISel(tm, OptLevel), - Subtarget(tm.getSubtarget<NVPTXSubtarget>()) -{ + : SelectionDAGISel(tm, OptLevel), + Subtarget(tm.getSubtarget<NVPTXSubtarget>()) { // Always do fma.f32 fpcontract if the target supports the instruction. // Always do fma.f64 fpcontract if the target supports the instruction. // Do mad.f32 is nvptx-mad-enable is specified and the target does not // support fma.f32. doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32(); - doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel>=1); - doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel>=1); - doFMAF32AGG = (OptLevel > 0) && Subtarget.hasFMAF32() && - (FMAContractLevel==2); - doFMAF64AGG = (OptLevel > 0) && Subtarget.hasFMAF64() && - (FMAContractLevel==2); + doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1); + doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1); + doFMAF32AGG = + (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2); + doFMAF64AGG = + (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2); allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction; @@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm, /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. -SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) - return NULL; // Already selected. + return NULL; // Already selected. SDNode *ResNode = NULL; switch (N->getOpcode()) { @@ -119,30 +109,34 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreV4: ResNode = SelectStoreVector(N); break; - default: break; + default: + break; } if (ResNode) return ResNode; return SelectCode(N); } - -static unsigned int -getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) -{ +static unsigned int getCodeAddrSpace(MemSDNode *N, + const NVPTXSubtarget &Subtarget) { const Value *Src = N->getSrcValue(); if (!Src) return NVPTX::PTXLdStInstCode::LOCAL; if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) { switch (PT->getAddressSpace()) { - case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL; - case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL; - case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED; + case llvm::ADDRESS_SPACE_LOCAL: + return NVPTX::PTXLdStInstCode::LOCAL; + case llvm::ADDRESS_SPACE_GLOBAL: + return NVPTX::PTXLdStInstCode::GLOBAL; + case llvm::ADDRESS_SPACE_SHARED: + return NVPTX::PTXLdStInstCode::SHARED; case llvm::ADDRESS_SPACE_CONST_NOT_GEN: return NVPTX::PTXLdStInstCode::CONSTANT; - case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC; - case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM; + case llvm::ADDRESS_SPACE_GENERIC: + return NVPTX::PTXLdStInstCode::GENERIC; + case llvm::ADDRESS_SPACE_PARAM: + return NVPTX::PTXLdStInstCode::PARAM; case llvm::ADDRESS_SPACE_CONST: // If the arch supports generic address space, translate it to GLOBAL // for correctness. @@ -153,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) return NVPTX::PTXLdStInstCode::GLOBAL; else return NVPTX::PTXLdStInstCode::CONSTANT; - default: break; + default: + break; } } return NVPTX::PTXLdStInstCode::LOCAL; } - -SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { DebugLoc dl = N->getDebugLoc(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT LoadedVT = LD->getMemoryVT(); - SDNode *NVPTXLD= NULL; + SDNode *NVPTXLD = NULL; // do not support pre/post inc/dec if (LD->isIndexed()) @@ -204,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); - unsigned fromTypeWidth = ScalarVT.getSizeInBits(); + unsigned fromTypeWidth = ScalarVT.getSizeInBits(); unsigned int fromType; if ((LD->getExtensionType() == ISD::SEXTLOAD)) fromType = NVPTX::PTXLdStInstCode::Signed; @@ -223,105 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { if (SelectDirectAddr(N1, Addr)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_avar; break; - case MVT::i16: Opcode = NVPTX::LD_i16_avar; break; - case MVT::i32: Opcode = NVPTX::LD_i32_avar; break; - case MVT::i64: Opcode = NVPTX::LD_i64_avar; break; - case MVT::f32: Opcode = NVPTX::LD_f32_avar; break; - case MVT::f64: Opcode = NVPTX::LD_f64_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Addr, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N1.getNode(), N1, Base, Offset): - SelectADDRsi(N1.getNode(), N1, Base, Offset)) { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Addr, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) + : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_asi; break; - case MVT::i16: Opcode = NVPTX::LD_i16_asi; break; - case MVT::i32: Opcode = NVPTX::LD_i32_asi; break; - case MVT::i64: Opcode = NVPTX::LD_i64_asi; break; - case MVT::f32: Opcode = NVPTX::LD_f32_asi; break; - case MVT::f64: Opcode = NVPTX::LD_f64_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N1.getNode(), N1, Base, Offset): - SelectADDRri(N1.getNode(), N1, Base, Offset)) { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N1.getNode(), N1, Base, Offset) + : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari_64; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari_64; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari_64; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari_64; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari_64; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari_64; + break; + default: + return NULL; } } else { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_ari; break; - case MVT::i16: Opcode = NVPTX::LD_i16_ari; break; - case MVT::i32: Opcode = NVPTX::LD_i32_ari; break; - case MVT::i64: Opcode = NVPTX::LD_i64_ari; break; - case MVT::f32: Opcode = NVPTX::LD_f32_ari; break; - case MVT::f64: Opcode = NVPTX::LD_f64_ari; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_ari; + break; + default: + return NULL; } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - Base, Offset, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 8); - } - else { + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), Base, Offset, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8); + } else { if (Subtarget.is64Bit()) { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg_64; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg_64; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg_64; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg_64; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg_64; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg_64; + break; + default: + return NULL; } } else { switch (TargetVT) { - case MVT::i8: Opcode = NVPTX::LD_i8_areg; break; - case MVT::i16: Opcode = NVPTX::LD_i16_areg; break; - case MVT::i32: Opcode = NVPTX::LD_i32_areg; break; - case MVT::i64: Opcode = NVPTX::LD_i64_areg; break; - case MVT::f32: Opcode = NVPTX::LD_f32_areg; break; - case MVT::f64: Opcode = NVPTX::LD_f64_areg; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::LD_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::LD_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::LD_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::LD_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::LD_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::LD_f64_areg; + break; + default: + return NULL; } } - SDValue Ops[] = { getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(fromType), - getI32Imm(fromTypeWidth), - N1, Chain }; - NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, - MVT::Other, Ops, 7); + SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(fromType), + getI32Imm(fromTypeWidth), N1, Chain }; + NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7); } if (NVPTXLD != NULL) { @@ -344,9 +399,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { MemSDNode *MemSD = cast<MemSDNode>(N); EVT LoadedVT = MemSD->getMemoryVT(); - if (!LoadedVT.isSimple()) - return NULL; + return NULL; // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); @@ -369,11 +423,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { // type is integer // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float MVT ScalarVT = SimpleVT.getScalarType(); - unsigned FromTypeWidth = ScalarVT.getSizeInBits(); + unsigned FromTypeWidth = ScalarVT.getSizeInBits(); unsigned int FromType; // The last operand holds the original LoadSDNode::getExtensionType() value - unsigned ExtensionType = - cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue(); + unsigned ExtensionType = cast<ConstantSDNode>( + N->getOperand(N->getNumOperands() - 1))->getZExtValue(); if (ExtensionType == ISD::SEXTLOAD) FromType = NVPTX::PTXLdStInstCode::Signed; else if (ScalarVT.isFloatingPoint()) @@ -384,197 +438,328 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { unsigned VecType; switch (N->getOpcode()) { - case NVPTXISD::LoadV2: VecType = NVPTX::PTXLdStInstCode::V2; break; - case NVPTXISD::LoadV4: VecType = NVPTX::PTXLdStInstCode::V4; break; - default: return NULL; + case NVPTXISD::LoadV2: + VecType = NVPTX::PTXLdStInstCode::V2; + break; + case NVPTXISD::LoadV4: + VecType = NVPTX::PTXLdStInstCode::V4; + break; + default: + return NULL; } EVT EltVT = N->getValueType(0); if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_avar; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_avar; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_avar; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_avar; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_avar; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_avar; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_avar; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_avar; + break; } break; } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Addr, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Addr, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(Op1.getNode(), Op1, Base, Offset): - SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_asi; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_asi; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_asi; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_asi; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_asi; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_asi; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_asi; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_asi; + break; } break; } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRri64(Op1.getNode(), Op1, Base, Offset): - SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) + : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari_64; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari_64; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari_64; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_ari; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_ari; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_ari; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_ari; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_ari; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_ari; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_ari; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_ari; + break; } break; } } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Base, Offset, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Base, Offset, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8); } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg_64; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg_64; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg_64; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg_64; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg_64; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v2_areg; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v2_areg; break; - case MVT::i64: Opcode = NVPTX::LDV_i64_v2_areg; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v2_areg; break; - case MVT::f64: Opcode = NVPTX::LDV_f64_v2_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::LDV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::LDV_f64_v2_areg; + break; } break; case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break; - case MVT::i16: Opcode = NVPTX::LDV_i16_v4_areg; break; - case MVT::i32: Opcode = NVPTX::LDV_i32_v4_areg; break; - case MVT::f32: Opcode = NVPTX::LDV_f32_v4_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::LDV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::LDV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::LDV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::LDV_f32_v4_areg; + break; } break; } } - SDValue Ops[] = { getI32Imm(IsVolatile), - getI32Imm(CodeAddrSpace), - getI32Imm(VecType), - getI32Imm(FromType), - getI32Imm(FromTypeWidth), - Op1, Chain }; + SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace), + getI32Imm(VecType), getI32Imm(FromType), + getI32Imm(FromTypeWidth), Op1, Chain }; LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7); } @@ -598,89 +783,179 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { // Select opcode if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LDGV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; + break; } break; case NVPTXISD::LDGV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; + break; } break; case NVPTXISD::LDUV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; + break; } break; case NVPTXISD::LDUV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::LDGV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; + break; } break; case NVPTXISD::LDGV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; + break; } break; case NVPTXISD::LDUV2: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break; - case MVT::i64: Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break; - case MVT::f64: Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; + break; } break; case NVPTXISD::LDUV4: switch (RetVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break; - case MVT::i16: Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break; - case MVT::i32: Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break; - case MVT::f32: Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; + break; } break; } @@ -696,8 +971,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { return LD; } - -SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { DebugLoc dl = N->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(N); EVT StoreVT = ST->getMemoryVT(); @@ -738,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { // - for integer type, always use 'u' // MVT ScalarVT = SimpleVT.getScalarType(); - unsigned toTypeWidth = ScalarVT.getSizeInBits(); + unsigned toTypeWidth = ScalarVT.getSizeInBits(); unsigned int toType; if (ScalarVT.isFloatingPoint()) toType = NVPTX::PTXLdStInstCode::Float; @@ -757,108 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_avar; break; - case MVT::i16: Opcode = NVPTX::ST_i16_avar; break; - case MVT::i32: Opcode = NVPTX::ST_i32_avar; break; - case MVT::i64: Opcode = NVPTX::ST_i64_avar; break; - case MVT::f32: Opcode = NVPTX::ST_f32_avar; break; - case MVT::f64: Opcode = NVPTX::ST_f64_avar; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_avar; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_avar; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_avar; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_avar; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_avar; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_avar; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Addr, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N2.getNode(), N2, Base, Offset): - SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Addr, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_asi; break; - case MVT::i16: Opcode = NVPTX::ST_i16_asi; break; - case MVT::i32: Opcode = NVPTX::ST_i32_asi; break; - case MVT::i64: Opcode = NVPTX::ST_i64_asi; break; - case MVT::f32: Opcode = NVPTX::ST_f32_asi; break; - case MVT::f64: Opcode = NVPTX::ST_f64_asi; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_asi; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_asi; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_asi; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_asi; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_asi; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_asi; + break; + default: + return NULL; } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N2.getNode(), N2, Base, Offset): - SelectADDRri(N2.getNode(), N2, Base, Offset)) { + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari_64; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari_64; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari_64; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari_64; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari_64; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari_64; + break; + default: + return NULL; } } else { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_ari; break; - case MVT::i16: Opcode = NVPTX::ST_i16_ari; break; - case MVT::i32: Opcode = NVPTX::ST_i32_ari; break; - case MVT::i64: Opcode = NVPTX::ST_i64_ari; break; - case MVT::f32: Opcode = NVPTX::ST_f32_ari; break; - case MVT::f64: Opcode = NVPTX::ST_f64_ari; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_ari; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_ari; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_ari; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_ari; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_ari; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_ari; + break; + default: + return NULL; } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - Base, Offset, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 9); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), Base, Offset, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9); } else { if (Subtarget.is64Bit()) { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg_64; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg_64; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg_64; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg_64; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg_64; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg_64; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg_64; + break; + default: + return NULL; } } else { switch (SourceVT) { - case MVT::i8: Opcode = NVPTX::ST_i8_areg; break; - case MVT::i16: Opcode = NVPTX::ST_i16_areg; break; - case MVT::i32: Opcode = NVPTX::ST_i32_areg; break; - case MVT::i64: Opcode = NVPTX::ST_i64_areg; break; - case MVT::f32: Opcode = NVPTX::ST_f32_areg; break; - case MVT::f64: Opcode = NVPTX::ST_f64_areg; break; - default: return NULL; + case MVT::i8: + Opcode = NVPTX::ST_i8_areg; + break; + case MVT::i16: + Opcode = NVPTX::ST_i16_areg; + break; + case MVT::i32: + Opcode = NVPTX::ST_i32_areg; + break; + case MVT::i64: + Opcode = NVPTX::ST_i64_areg; + break; + case MVT::f32: + Opcode = NVPTX::ST_f32_areg; + break; + case MVT::f64: + Opcode = NVPTX::ST_f64_areg; + break; + default: + return NULL; } } - SDValue Ops[] = { N1, - getI32Imm(isVolatile), - getI32Imm(codeAddrSpace), - getI32Imm(vecType), - getI32Imm(toType), - getI32Imm(toTypeWidth), - N2, Chain }; - NVPTXST = CurDAG->getMachineNode(Opcode, dl, - MVT::Other, Ops, 8); + SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), + getI32Imm(vecType), getI32Imm(toType), + getI32Imm(toTypeWidth), N2, Chain }; + NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8); } if (NVPTXST != NULL) { @@ -901,14 +1233,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { // - for integer type, always use 'u' assert(StoreVT.isSimple() && "Store value is not simple"); MVT ScalarVT = StoreVT.getSimpleVT().getScalarType(); - unsigned ToTypeWidth = ScalarVT.getSizeInBits(); + unsigned ToTypeWidth = ScalarVT.getSizeInBits(); unsigned ToType; if (ScalarVT.isFloatingPoint()) ToType = NVPTX::PTXLdStInstCode::Float; else ToType = NVPTX::PTXLdStInstCode::Unsigned; - SmallVector<SDValue, 12> StOps; SDValue N2; unsigned VecType; @@ -928,7 +1259,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { StOps.push_back(N->getOperand(4)); N2 = N->getOperand(5); break; - default: return NULL; + default: + return NULL; } StOps.push_back(getI32Imm(IsVolatile)); @@ -939,105 +1271,197 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_avar; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_avar; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_avar; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_avar; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_avar; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_avar; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_avar; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_avar; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_avar; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_avar; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_avar; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_avar; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_avar; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_avar; + break; } break; } StOps.push_back(Addr); - } else if (Subtarget.is64Bit()? - SelectADDRsi64(N2.getNode(), N2, Base, Offset): - SelectADDRsi(N2.getNode(), N2, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRsi64(N2.getNode(), N2, Base, Offset) + : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_asi; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_asi; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_asi; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_asi; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_asi; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_asi; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_asi; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_asi; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_asi; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_asi; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_asi; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_asi; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_asi; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_asi; + break; } break; } StOps.push_back(Base); StOps.push_back(Offset); - } else if (Subtarget.is64Bit()? - SelectADDRri64(N2.getNode(), N2, Base, Offset): - SelectADDRri(N2.getNode(), N2, Base, Offset)) { + } else if (Subtarget.is64Bit() + ? SelectADDRri64(N2.getNode(), N2, Base, Offset) + : SelectADDRri(N2.getNode(), N2, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari_64; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari_64; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari_64; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_ari; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_ari; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_ari; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_ari; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_ari; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_ari; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_ari; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_ari; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_ari; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_ari; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_ari; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_ari; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_ari; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_ari; + break; } break; } @@ -1047,49 +1471,95 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg_64; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg_64; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg_64; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg_64; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg_64; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg_64; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg_64; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg_64; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg_64; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg_64; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg_64; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg_64; + break; } break; } } else { switch (N->getOpcode()) { - default: return NULL; + default: + return NULL; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v2_areg; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v2_areg; break; - case MVT::i64: Opcode = NVPTX::STV_i64_v2_areg; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v2_areg; break; - case MVT::f64: Opcode = NVPTX::STV_f64_v2_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v2_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v2_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v2_areg; + break; + case MVT::i64: + Opcode = NVPTX::STV_i64_v2_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v2_areg; + break; + case MVT::f64: + Opcode = NVPTX::STV_f64_v2_areg; + break; } break; case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { - default: return NULL; - case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break; - case MVT::i16: Opcode = NVPTX::STV_i16_v4_areg; break; - case MVT::i32: Opcode = NVPTX::STV_i32_v4_areg; break; - case MVT::f32: Opcode = NVPTX::STV_f32_v4_areg; break; + default: + return NULL; + case MVT::i8: + Opcode = NVPTX::STV_i8_v4_areg; + break; + case MVT::i16: + Opcode = NVPTX::STV_i16_v4_areg; + break; + case MVT::i32: + Opcode = NVPTX::STV_i32_v4_areg; + break; + case MVT::f32: + Opcode = NVPTX::STV_f32_v4_areg; + break; } break; } @@ -1112,8 +1582,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { // Return true if TGA or ES. - if (N.getOpcode() == ISD::TargetGlobalAddress - || N.getOpcode() == ISD::TargetExternalSymbol) { + if (N.getOpcode() == ISD::TargetGlobalAddress || + N.getOpcode() == ISD::TargetExternalSymbol) { Address = N; return true; } @@ -1131,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { } // symbol+offset -bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRsi_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - SDValue base=Addr.getOperand(0); + SDValue base = Addr.getOperand(0); if (SelectDirectAddr(base, Base)) { Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt); return true; @@ -1159,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr, } // register+offset -bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, - SDValue &Base, SDValue &Offset, - MVT mvt) { +bool NVPTXDAGToDAGISel::SelectADDRri_imp( + SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) { if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); Offset = CurDAG->getTargetConstant(0, mvt); @@ -1169,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress) - return false; // direct calls. + return false; // direct calls. if (Addr.getOpcode() == ISD::ADD) { if (SelectDirectAddr(Addr.getOperand(0), Addr)) { @@ -1177,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr, } if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { if (FrameIndexSDNode *FIN = - dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) + dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) // Constant offset from frame ref. Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt); else @@ -1209,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, // (See SelectionDAGNodes.h). So we need to check for both. if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) { Src = mN->getSrcValue(); - } - else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { + } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) { Src = mN->getSrcValue(); } if (!Src) @@ -1222,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. -bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps) { +bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) { SDValue Op0, Op1; switch (ConstraintCode) { - default: return true; - case 'm': // memory + default: + return true; + case 'm': // memory if (SelectDirectAddr(Op, Op0)) { OutOps.push_back(Op0); OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32)); @@ -1251,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, // pattern matcher inserts a bunch of IMOVi8rr to convert // the imm to i8imm, and this causes instruction selection // to fail. -bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, - SDValue &Retval) { - if (!(N.getOpcode() == ISD::UNDEF) && - !(N.getOpcode() == ISD::Constant)) +bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) { + if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant)) return false; if (N.getOpcode() == ISD::UNDEF) diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 4ec924117a..70e8e46429 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -64,11 +64,10 @@ public: const NVPTXSubtarget &Subtarget; - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector<SDValue> &OutOps); + virtual bool SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps); private: - // Include the pieces autogenerated from the target description. +// Include the pieces autogenerated from the target description. #include "NVPTXGenDAGISel.inc" SDNode *Select(SDNode *N); @@ -99,7 +98,6 @@ private: bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset); - bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const; bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index e9a9fbfd04..6e01a5a820 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #include "NVPTXISelLowering.h" #include "NVPTX.h" #include "NVPTXTargetMachine.h" @@ -44,14 +43,14 @@ using namespace llvm; static unsigned int uniqueCallSite = 0; -static cl::opt<bool> -sched4reg("nvptx-sched4reg", - cl::desc("NVPTX Specific: schedule for register pressue"), - cl::init(false)); +static cl::opt<bool> sched4reg( + "nvptx-sched4reg", + cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false)); static bool IsPTXVectorType(MVT VT) { switch (VT.SimpleTy) { - default: return false; + default: + return false; case MVT::v2i8: case MVT::v4i8: case MVT::v2i16: @@ -62,22 +61,21 @@ static bool IsPTXVectorType(MVT VT) { case MVT::v2f32: case MVT::v4f32: case MVT::v2f64: - return true; + return true; } } // NVPTXTargetLowering Constructor. NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) -: TargetLowering(TM, new NVPTXTargetObjectFile()), - nvTM(&TM), - nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { + : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM), + nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) { // always lower memset, memcpy, and memmove intrinsics to load/store // instructions, rather // then generating calls to memset, mempcy or memmove. - MaxStoresPerMemset = (unsigned)0xFFFFFFFF; - MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF; - MaxStoresPerMemmove = (unsigned)0xFFFFFFFF; + MaxStoresPerMemset = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF; + MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -100,52 +98,50 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); // Operations not directly supported by NVPTX. - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::BR_CC, MVT::i8, Expand); - setOperationAction(ISD::BR_CC, MVT::i16, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i8, Expand); + setOperationAction(ISD::BR_CC, MVT::i16, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (nvptxSubtarget.hasROT64()) { - setOperationAction(ISD::ROTL , MVT::i64, Legal); - setOperationAction(ISD::ROTR , MVT::i64, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i64, Expand); - setOperationAction(ISD::ROTR , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Legal); + setOperationAction(ISD::ROTR, MVT::i64, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); } if (nvptxSubtarget.hasROT32()) { - setOperationAction(ISD::ROTL , MVT::i32, Legal); - setOperationAction(ISD::ROTR , MVT::i32, Legal); - } - else { - setOperationAction(ISD::ROTL , MVT::i32, Expand); - setOperationAction(ISD::ROTR , MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTR, MVT::i32, Legal); + } else { + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); } - setOperationAction(ISD::ROTL , MVT::i16, Expand); - setOperationAction(ISD::ROTR , MVT::i16, Expand); - setOperationAction(ISD::ROTL , MVT::i8, Expand); - setOperationAction(ISD::ROTR , MVT::i8, Expand); - setOperationAction(ISD::BSWAP , MVT::i16, Expand); - setOperationAction(ISD::BSWAP , MVT::i32, Expand); - setOperationAction(ISD::BSWAP , MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i16, Expand); + setOperationAction(ISD::ROTR, MVT::i16, Expand); + setOperationAction(ISD::ROTL, MVT::i8, Expand); + setOperationAction(ISD::ROTR, MVT::i8, Expand); + setOperationAction(ISD::BSWAP, MVT::i16, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); // Indirect branch is not supported. // This also disables Jump Table creation. - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); - setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); // We want to legalize constant related memmove and memcopy // intrinsics. @@ -168,16 +164,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setTruncStoreAction(MVT::i8, MVT::i1, Expand); // This is legal in NVPTX - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // TRAP can be lowered to PTX trap - setOperationAction(ISD::TRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Legal); // Register custom handling for vector loads/stores - for (int i = MVT::FIRST_VECTOR_VALUETYPE; - i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; + for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; + ++i) { + MVT VT = (MVT::SimpleValueType) i; if (IsPTXVectorType(VT)) { setOperationAction(ISD::LOAD, VT, Custom); setOperationAction(ISD::STORE, VT, Custom); @@ -190,49 +186,86 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) computeRegisterProperties(); } - const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; - case NVPTXISD::CALL: return "NVPTXISD::CALL"; - case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG"; - case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper"; - case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin"; - case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam"; + default: + return 0; + case NVPTXISD::CALL: + return "NVPTXISD::CALL"; + case NVPTXISD::RET_FLAG: + return "NVPTXISD::RET_FLAG"; + case NVPTXISD::Wrapper: + return "NVPTXISD::Wrapper"; + case NVPTXISD::NVBuiltin: + return "NVPTXISD::NVBuiltin"; + case NVPTXISD::DeclareParam: + return "NVPTXISD::DeclareParam"; case NVPTXISD::DeclareScalarParam: return "NVPTXISD::DeclareScalarParam"; - case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet"; - case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam"; - case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall"; - case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam"; - case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam"; - case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32"; - case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32"; - case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam"; - case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin"; - case NVPTXISD::CallArg: return "NVPTXISD::CallArg"; - case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg"; - case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd"; - case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid"; - case NVPTXISD::CallVal: return "NVPTXISD::CallVal"; - case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol"; - case NVPTXISD::Prototype: return "NVPTXISD::Prototype"; - case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam"; - case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval"; - case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval"; - case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval"; - case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam"; - case NVPTXISD::RETURN: return "NVPTXISD::RETURN"; - case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin"; - case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd"; - case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2"; - case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4"; - case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2"; - case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4"; - case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2"; - case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4"; - case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2"; - case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; + case NVPTXISD::DeclareRet: + return "NVPTXISD::DeclareRet"; + case NVPTXISD::DeclareRetParam: + return "NVPTXISD::DeclareRetParam"; + case NVPTXISD::PrintCall: + return "NVPTXISD::PrintCall"; + case NVPTXISD::LoadParam: + return "NVPTXISD::LoadParam"; + case NVPTXISD::StoreParam: + return "NVPTXISD::StoreParam"; + case NVPTXISD::StoreParamS32: + return "NVPTXISD::StoreParamS32"; + case NVPTXISD::StoreParamU32: + return "NVPTXISD::StoreParamU32"; + case NVPTXISD::MoveToParam: + return "NVPTXISD::MoveToParam"; + case NVPTXISD::CallArgBegin: + return "NVPTXISD::CallArgBegin"; + case NVPTXISD::CallArg: + return "NVPTXISD::CallArg"; + case NVPTXISD::LastCallArg: + return "NVPTXISD::LastCallArg"; + case NVPTXISD::CallArgEnd: + return "NVPTXISD::CallArgEnd"; + case NVPTXISD::CallVoid: + return "NVPTXISD::CallVoid"; + case NVPTXISD::CallVal: + return "NVPTXISD::CallVal"; + case NVPTXISD::CallSymbol: + return "NVPTXISD::CallSymbol"; + case NVPTXISD::Prototype: + return "NVPTXISD::Prototype"; + case NVPTXISD::MoveParam: + return "NVPTXISD::MoveParam"; + case NVPTXISD::MoveRetval: + return "NVPTXISD::MoveRetval"; + case NVPTXISD::MoveToRetval: + return "NVPTXISD::MoveToRetval"; + case NVPTXISD::StoreRetval: + return "NVPTXISD::StoreRetval"; + case NVPTXISD::PseudoUseParam: + return "NVPTXISD::PseudoUseParam"; + case NVPTXISD::RETURN: + return "NVPTXISD::RETURN"; + case NVPTXISD::CallSeqBegin: + return "NVPTXISD::CallSeqBegin"; + case NVPTXISD::CallSeqEnd: + return "NVPTXISD::CallSeqEnd"; + case NVPTXISD::LoadV2: + return "NVPTXISD::LoadV2"; + case NVPTXISD::LoadV4: + return "NVPTXISD::LoadV4"; + case NVPTXISD::LDGV2: + return "NVPTXISD::LDGV2"; + case NVPTXISD::LDGV4: + return "NVPTXISD::LDGV4"; + case NVPTXISD::LDUV2: + return "NVPTXISD::LDUV2"; + case NVPTXISD::LDUV4: + return "NVPTXISD::LDUV4"; + case NVPTXISD::StoreV2: + return "NVPTXISD::StoreV2"; + case NVPTXISD::StoreV4: + return "NVPTXISD::StoreV4"; } } @@ -248,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op); } -std::string NVPTXTargetLowering::getPrototype(Type *retTy, - const ArgListTy &Args, - const SmallVectorImpl<ISD::OutputArg> &Outs, - unsigned retAlignment) const { +std::string NVPTXTargetLowering::getPrototype( + Type *retTy, const ArgListTy &Args, + const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); @@ -267,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned size = 0; if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) { size = ITy->getBitWidth(); - if (size < 32) size = 32; - } - else { + if (size < 32) + size = 32; + } else { assert(retTy->isFloatingPointTy() && "Floating point type expected here"); size = retTy->getPrimitiveSizeInBits(); } O << ".param .b" << size << " _"; - } - else if (isa<PointerType>(retTy)) - O << ".param .b" << getPointerTy().getSizeInBits() - << " _"; + } else if (isa<PointerType>(retTy)) + O << ".param .b" << getPointerTy().getSizeInBits() << " _"; else { if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned totalsz = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { elems = vtparts[i].getVectorNumElements(); elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - totalsz += sz/8; + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + totalsz += sz / 8; } } - O << ".param .align " - << retAlignment - << " .b8 _[" - << totalsz << "]"; - } - else { - assert(false && - "Unknown return type"); + O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; + } else { + assert(false && "Unknown return type"); } } - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, retTy, vtparts); unsigned idx = 0; - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -322,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0, je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " _"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; ++idx; } - if (i < e-1) + if (i < e - 1) O << ", "; } } @@ -340,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, bool first = true; MVT thePointerTy = getPointerTy(); - for (unsigned i=0,e=Args.size(); i!=e; ++i) { + for (unsigned i = 0, e = Args.size(); i != e; ++i) { const Type *Ty = Args[i].Ty; if (!first) { O << ", "; @@ -351,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, unsigned sz = 0; if (isa<IntegerType>(Ty)) { sz = cast<IntegerType>(Ty)->getBitWidth(); - if (sz < 32) sz = 32; - } - else if (isa<PointerType>(Ty)) + if (sz < 32) + sz = 32; + } else if (isa<PointerType>(Ty)) sz = thePointerTy.getSizeInBits(); else sz = Ty->getPrimitiveSizeInBits(); @@ -365,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, continue; } const PointerType *PTy = dyn_cast<PointerType>(Ty); - assert(PTy && - "Param with byval attribute should be a pointer type"); + assert(PTy && "Param with byval attribute should be a pointer type"); Type *ETy = PTy->getElementType(); if (isABI) { unsigned align = Outs[i].Flags.getByValAlign(); unsigned sz = getDataLayout()->getTypeAllocSize(ETy); - O << ".param .align " << align - << " .b8 "; + O << ".param .align " << align << " .b8 "; O << "_"; O << "[" << sz << "]"; continue; - } - else { + } else { SmallVector<EVT, 16> vtparts; ComputeValueVTs(*this, ETy, vtparts); - for (unsigned i=0,e=vtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = vtparts[i]; if (vtparts[i].isVector()) { @@ -389,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, elemtype = vtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; O << ".reg .b" << sz << " "; O << "_"; - if (j<je-1) O << ", "; + if (j < je - 1) + O << ", "; } - if (i<e-1) + if (i < e - 1) O << ", "; } continue; @@ -406,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy, return O.str(); } - -SDValue -NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const { - SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; +SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + DebugLoc &dl = CLI.DL; SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; - SmallVector<SDValue, 32> &OutVals = CLI.OutVals; - SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &isTailCall = CLI.IsTailCall; - ArgListTy &Args = CLI.Args; - Type *retTy = CLI.RetTy; - ImmutableCallSite *CS = CLI.CS; + SmallVector<SDValue, 32> &OutVals = CLI.OutVals; + SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &isTailCall = CLI.IsTailCall; + ArgListTy &Args = CLI.Args; + Type *retTy = CLI.RetTy; + ImmutableCallSite *CS = CLI.CS; bool isABI = (nvptxSubtarget.getSmVersion() >= 20); SDValue tempChain = Chain; - Chain = DAG.getCALLSEQ_START(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true)); + Chain = + DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true)); SDValue InFlag = Chain.getValue(1); assert((Outs.size() == Args.size()) && @@ -434,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned paramCount = 0; // Declare the .params or .reg need to pass values // to the function - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { EVT VT = Outs[i].VT; if (Outs[i].Flags.isByVal() == false) { @@ -445,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isABI) isReg = 0; unsigned sz = VT.getSizeInBits(); - if (VT.isInteger() && (sz < 32)) sz = 32; + if (VT.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), - DAG.getConstant(isReg, MVT::i32), - InFlag }; + DAG.getConstant(isReg, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(0, MVT::i32), OutVals[i], InFlag }; + DAG.getConstant(0, MVT::i32), OutVals[i], + InFlag }; unsigned opcode = NVPTXISD::StoreParam; if (isReg) @@ -477,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // struct or vector SmallVector<EVT, 16> vtparts; const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); - assert(PTy && - "Type of a byval parameter should be pointer"); + assert(PTy && "Type of a byval parameter should be pointer"); ComputeValueVTs(*this, PTy->getElementType(), vtparts); if (isABI) { @@ -488,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // The ByValAlign in the Outs[i].Flags is alway set at this point, so we // don't need to // worry about natural alignment or not. See TargetLowering::LowerCallTo() - SDValue DeclareParamOps[] = { Chain, - DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(sz, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { + Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32), + DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), + InFlag + }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) sz = 8; - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), - OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(curOffset, MVT::i32), - theVal, InFlag }; + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(curOffset, MVT::i32), + theVal, InFlag }; Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs, CopyParamOps, 5); InFlag = Chain.getValue(1); - curOffset += sz/8; + curOffset += sz / 8; } } ++paramCount; @@ -530,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Non-abi, struct or vector // Declare a bunch or .reg .b<size> .param<n> unsigned curOffset = 0; - for (unsigned j=0,je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned elems = 1; EVT elemtype = vtparts[j]; if (vtparts[j].isVector()) { elems = vtparts[j].getVectorNumElements(); elemtype = vtparts[j].getVectorElementType(); } - for (unsigned k=0,ke=elems; k!=ke; ++k) { + for (unsigned k = 0, ke = elems; k != ke; ++k) { unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 32)) sz = 32; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount, - MVT::i32), - DAG.getConstant(sz, MVT::i32), - DAG.getConstant(1, MVT::i32), - InFlag }; + SDValue DeclareParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(sz, MVT::i32), + DAG.getConstant(1, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, DeclareParamOps, 5); InFlag = Chain.getValue(1); - SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], - DAG.getConstant(curOffset, - getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, 0); + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = + DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(), + false, false, false, 0); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(0, MVT::i32), theVal, @@ -578,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or // individual .reg .b<size> func_retval<0..> for non ABI unsigned resultsz = 0; - for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) { + for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) { unsigned elems = 1; EVT elemtype = resvtparts[i]; if (resvtparts[i].isVector()) { elems = resvtparts[i].getVectorNumElements(); elemtype = resvtparts[i].getVectorElementType(); } - for (unsigned j=0,je=elems; j!=je; ++j) { + for (unsigned j = 0, je = elems; j != je; ++j) { unsigned sz = elemtype.getSizeInBits(); if (isABI == false) { - if (elemtype.isInteger() && (sz < 32)) sz = 32; - } - else { - if (elemtype.isInteger() && (sz < 8)) sz = 8; + if (elemtype.isInteger() && (sz < 32)) + sz = 32; + } else { + if (elemtype.isInteger() && (sz < 8)) + sz = 8; } if (isABI == false) { SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); @@ -609,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (isABI) { if (retTy->isPrimitiveType() || retTy->isIntegerTy() || - retTy->isPointerTy() ) { + retTy->isPointerTy()) { // Scalar needs to be at least 32bit wide if (resultsz < 32) resultsz = 32; @@ -620,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); - } - else { + } else { if (Func) { // direct call if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment)) retAlignment = getDataLayout()->getABITypeAlignment(retTy); @@ -631,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, retAlignment = getDataLayout()->getABITypeAlignment(retTy); } SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment, - MVT::i32), - DAG.getConstant(resultsz/8, MVT::i32), - DAG.getConstant(0, MVT::i32), InFlag }; + SDValue DeclareRetOps[] = { Chain, + DAG.getConstant(retAlignment, MVT::i32), + DAG.getConstant(resultsz / 8, MVT::i32), + DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, DeclareRetOps, 5); InFlag = Chain.getValue(1); @@ -652,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // INLINEASM SDNode. SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue); std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment); - const char *asmstr = nvTM->getManagedStrPool()-> - getManagedString(proto_string.c_str())->c_str(); - SDValue InlineAsmOps[] = { Chain, - DAG.getTargetExternalSymbol(asmstr, - getPointerTy()), - DAG.getMDNode(0), - DAG.getTargetConstant(0, MVT::i32), InFlag }; + const char *asmstr = nvTM->getManagedStrPool() + ->getManagedString(proto_string.c_str())->c_str(); + SDValue InlineAsmOps[] = { + Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()), + DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag + }; Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5); InFlag = Chain.getValue(1); } // Op to just print "call" SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrintCallOps[] = { Chain, - DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1) - : retCount, MVT::i32), - InFlag }; - Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl, - PrintCallVTs, PrintCallOps, 3); + SDValue PrintCallOps[] = { + Chain, + DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32), + InFlag + }; + Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), + dl, PrintCallVTs, PrintCallOps, 3); InFlag = Chain.getValue(1); // Ops to print out the function name @@ -685,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallArgBeginOps, 2); InFlag = Chain.getValue(1); - for (unsigned i=0, e=paramCount; i!=e; ++i) { + for (unsigned i = 0, e = paramCount; i != e; ++i) { unsigned opcode; - if (i==(e-1)) + if (i == (e - 1)) opcode = NVPTXISD::LastCallArg; else opcode = NVPTXISD::CallArg; SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), - DAG.getConstant(i, MVT::i32), - InFlag }; + DAG.getConstant(i, MVT::i32), InFlag }; Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); InFlag = Chain.getValue(1); } SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CallArgEndOps[] = { Chain, - DAG.getConstant(Func ? 1 : 0, MVT::i32), + SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, - 3); + Chain = + DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); InFlag = Chain.getValue(1); if (!Func) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue PrototypeOps[] = { Chain, - DAG.getConstant(uniqueCallSite, MVT::i32), + SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); InFlag = Chain.getValue(1); @@ -719,32 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Ins.size() > 0) { if (isABI) { unsigned resoffset = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = Ins[i].VT.getSizeInBits(); - if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8; + if (Ins[i].VT.isInteger() && (sz < 8)) + sz = 8; EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue }; - SDValue LoadRetOps[] = { - Chain, - DAG.getConstant(1, MVT::i32), - DAG.getConstant(resoffset, MVT::i32), - InFlag - }; + SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32), + DAG.getConstant(resoffset, MVT::i32), InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs, LoadRetOps, array_lengthof(LoadRetOps)); Chain = retval.getValue(1); InFlag = retval.getValue(2); InVals.push_back(retval); - resoffset += sz/8; + resoffset += sz / 8; } - } - else { + } else { SmallVector<EVT, 16> resvtparts; ComputeValueVTs(*this, retTy, resvtparts); assert(Ins.size() == resvtparts.size() && "Unexpected number of return values in non-ABI case"); unsigned paramNum = 0; - for (unsigned i=0,e=Ins.size(); i!=e; ++i) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { assert(EVT(Ins[i].VT) == resvtparts[i] && "Unexpected EVT type in non-ABI case"); unsigned numelems = 1; @@ -754,14 +773,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, elemtype = Ins[i].VT.getVectorElementType(); } std::vector<SDValue> tempRetVals; - for (unsigned j=0; j<numelems; ++j) { + for (unsigned j = 0; j < numelems; ++j) { EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue }; - SDValue MoveRetOps[] = { - Chain, - DAG.getConstant(0, MVT::i32), - DAG.getConstant(paramNum, MVT::i32), - InFlag - }; + SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32), + DAG.getConstant(paramNum, MVT::i32), + InFlag }; SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs, MoveRetOps, array_lengthof(MoveRetOps)); Chain = retval.getValue(1); @@ -777,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } } - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getIntPtrConstant(uniqueCallSite, true), - DAG.getIntPtrConstant(uniqueCallSite+1, true), + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true), + DAG.getIntPtrConstant(uniqueCallSite + 1, true), InFlag); uniqueCallSite++; @@ -792,45 +807,51 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack() // (see LegalizeDAG.cpp). This is slow and uses local memory. // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5 -SDValue NVPTXTargetLowering:: -LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); SmallVector<SDValue, 8> Ops; unsigned NumOperands = Node->getNumOperands(); - for (unsigned i=0; i < NumOperands; ++i) { + for (unsigned i = 0; i < NumOperands; ++i) { SDValue SubOp = Node->getOperand(i); EVT VVT = SubOp.getNode()->getValueType(0); EVT EltVT = VVT.getVectorElementType(); unsigned NumSubElem = VVT.getVectorNumElements(); - for (unsigned j=0; j < NumSubElem; ++j) { + for (unsigned j = 0; j < NumSubElem; ++j) { Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, DAG.getIntPtrConstant(j))); } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], + Ops.size()); } -SDValue NVPTXTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) const { +SDValue +NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::RETURNADDR: return SDValue(); - case ISD::FRAMEADDR: return SDValue(); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: return Op; + case ISD::RETURNADDR: + return SDValue(); + case ISD::FRAMEADDR: + return SDValue(); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return Op; case ISD::BUILD_VECTOR: case ISD::EXTRACT_SUBVECTOR: return Op; - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); + case ISD::STORE: + return LowerSTORE(Op, DAG); + case ISD::LOAD: + return LowerLOAD(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } } - SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType() == MVT::i1) return LowerLOADi1(Op, DAG); @@ -842,24 +863,22 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // => // v1 = ld i8* addr // v = trunc v1 to i1 -SDValue NVPTXTargetLowering:: -LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(Node); DebugLoc dl = Node->getDebugLoc(); - assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ; + assert(LD->getExtensionType() == ISD::NON_EXTLOAD); assert(Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only"); - SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), - LD->getAlignment()); + SDValue newLD = + DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); // The legalizer (the caller) is expecting two values from the legalized // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() // in LegalizeDAG.cpp which also uses MergeValues. - SDValue Ops[] = {result, LD->getChain()}; + SDValue Ops[] = { result, LD->getChain() }; return DAG.getMergeValues(Ops, 2, dl); } @@ -887,7 +906,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { if (!ValVT.isSimple()) return SDValue(); switch (ValVT.getSimpleVT().SimpleTy) { - default: return SDValue(); + default: + return SDValue(); case MVT::v2i8: case MVT::v2i16: case MVT::v2i32: @@ -914,7 +934,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { NeedExt = true; switch (NumElts) { - default: return SDValue(); + default: + return SDValue(); case 2: Opcode = NVPTXISD::StoreV2; break; @@ -947,11 +968,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { MemSDNode *MemSD = cast<MemSDNode>(N); - SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL, - DAG.getVTList(MVT::Other), &Ops[0], - Ops.size(), MemSD->getMemoryVT(), - MemSD->getMemOperand()); - + SDValue NewSt = DAG.getMemIntrinsicNode( + Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); //return DCI.CombineTo(N, NewSt, true); return NewSt; @@ -964,8 +983,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { // => // v1 = zxt v to i8 // st i8, addr -SDValue NVPTXTargetLowering:: -LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); StoreSDNode *ST = cast<StoreSDNode>(Node); @@ -976,18 +994,14 @@ LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, - MVT::i8, Tmp3); - SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3); + SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); return Result; } - -SDValue -NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx, - EVT v) const { +SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, + int idx, EVT v) const { std::string *name = nvTM->getManagedStrPool()->getManagedString(inname); std::stringstream suffix; suffix << idx; @@ -1000,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const { return getExtSymb(DAG, ".PARAM", idx, v); } -SDValue -NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { +SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) { return getExtSymb(DAG, ".HLPPARAM", idx); } // Check to see if the kernel argument is image*_t or sampler_t bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { - static const char *const specialTypes[] = { - "struct._image2d_t", - "struct._image3d_t", - "struct._sampler_t" - }; + static const char *const specialTypes[] = { "struct._image2d_t", + "struct._image3d_t", + "struct._sampler_t" }; const Type *Ty = arg->getType(); const PointerType *PTy = dyn_cast<PointerType>(Ty); @@ -1033,12 +1044,10 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) { return false; } -SDValue -NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { +SDValue NVPTXTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); const DataLayout *TD = getDataLayout(); @@ -1054,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, std::vector<Type *> argTypes; std::vector<const Argument *> theArgs; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) { + I != E; ++I) { theArgs.push_back(I); argTypes.push_back(I->getType()); } - assert(argTypes.size() == Ins.size() && - "Ins types and function types did not match"); + //assert(argTypes.size() == Ins.size() && + // "Ins types and function types did not match"); int idx = 0; - for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) { + for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) { Type *Ty = argTypes[i]; EVT ObjectVT = getValueType(Ty); - assert(ObjectVT == Ins[i].VT && - "Ins type did not match function type"); + //assert(ObjectVT == Ins[i].VT && + // "Ins type did not match function type"); // If the kernel argument is image*_t or sampler_t, convert it to // a i32 constant holding the parameter position. This can later // matched in the AsmPrinter to output the correct mangled name. - if (isImageOrSamplerVal(theArgs[i], - (theArgs[i]->getParent() ? - theArgs[i]->getParent()->getParent() : 0))) { + if (isImageOrSamplerVal( + theArgs[i], + (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() + : 0))) { assert(isKernel && "Only kernels can have image/sampler params"); - InVals.push_back(DAG.getConstant(i+1, MVT::i32)); + InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); continue; } if (theArgs[i]->use_empty()) { // argument is dead - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + if (ObjectVT.isVector()) { + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned NumElts = ObjectVT.getVectorNumElements(); + for (unsigned vi = 0; vi < NumElts; ++vi) { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT)); + } + } else { + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT)); + } continue; } @@ -1089,31 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, // to newly created nodes. The SDNOdes for params have to // appear in the same order as their order of appearance // in the original function. "idx+1" holds that order. - if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) { + if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) { + if (ObjectVT.isVector()) { + unsigned NumElts = ObjectVT.getVectorNumElements(); + EVT EltVT = ObjectVT.getVectorElementType(); + unsigned Offset = 0; + for (unsigned vi = 0; vi < NumElts; ++vi) { + SDValue A = getParamSymbol(DAG, idx, getPointerTy()); + SDValue B = DAG.getIntPtrConstant(Offset); + SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), + //getParamSymbol(DAG, idx, EltVT), + //DAG.getConstant(Offset, getPointerTy())); + A, B); + Value *SrcValue = Constant::getNullValue(PointerType::get( + EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM)); + SDValue Ld = DAG.getLoad( + EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false, + false, + TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext()))); + Offset += EltVT.getStoreSizeInBits() / 8; + InVals.push_back(Ld); + } + continue; + } + // A plain scalar. if (isABI || isKernel) { // If ABI, load from the param symbol SDValue Arg = getParamSymbol(DAG, idx); // Conjure up a value that we can get the address space from. // FIXME: Using a constant here is a hack. - Value *srcValue = Constant::getNullValue(PointerType::get( - ObjectVT.getTypeForEVT(F->getContext()), - llvm::ADDRESS_SPACE_PARAM)); - SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg, - MachinePointerInfo(srcValue), false, false, - false, - TD->getABITypeAlignment(ObjectVT.getTypeForEVT( - F->getContext()))); + Value *srcValue = Constant::getNullValue( + PointerType::get(ObjectVT.getTypeForEVT(F->getContext()), + llvm::ADDRESS_SPACE_PARAM)); + SDValue p = DAG.getLoad( + ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false, + false, + TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext()))); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); - } - else { + } else { // If no ABI, just move the param symbol SDValue Arg = getParamSymbol(DAG, idx, ObjectVT); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); InVals.push_back(p); } continue; @@ -1130,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Arg = getParamSymbol(DAG, idx, getPointerTy()); SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg); if (p.getNode()) - DAG.AssignOrdering(p.getNode(), idx+1); + DAG.AssignOrdering(p.getNode(), idx + 1); if (isKernel) InVals.push_back(p); else { - SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, - DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), - p); + SDValue p2 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT, + DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p); InVals.push_back(p2); } } else { // Have to move a set of param symbols to registers and // store them locally and return the local pointer in InVals const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]); - assert(elemPtrType && - "Byval parameter should be a pointer type"); + assert(elemPtrType && "Byval parameter should be a pointer type"); Type *elemType = elemPtrType->getElementType(); // Compute the constituent parts SmallVector<EVT, 16> vtparts; SmallVector<uint64_t, 16> offsets; ComputeValueVTs(*this, elemType, vtparts, &offsets, 0); unsigned totalsize = 0; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) totalsize += vtparts[j].getStoreSizeInBits(); - SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()-> - CreateStackObject(totalsize/8, 16, false), - getPointerTy()); + SDValue localcopy = DAG.getFrameIndex( + MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false), + getPointerTy()); unsigned sizesofar = 0; std::vector<SDValue> theChains; - for (unsigned j=0, je=vtparts.size(); j!=je; ++j) { + for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { unsigned numElems = 1; - if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements(); - for (unsigned k=0, ke=numElems; k!=ke; ++k) { + if (vtparts[j].isVector()) + numElems = vtparts[j].getVectorNumElements(); + for (unsigned k = 0, ke = numElems; k != ke; ++k) { EVT tmpvt = vtparts[j]; - if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType(); + if (tmpvt.isVector()) + tmpvt = tmpvt.getVectorElementType(); SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt, getParamSymbol(DAG, idx, tmpvt)); - SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, - DAG.getConstant(sizesofar, getPointerTy())); - theChains.push_back(DAG.getStore(Chain, dl, arg, addr, - MachinePointerInfo(), false, false, 0)); - sizesofar += tmpvt.getStoreSizeInBits()/8; + SDValue addr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy, + DAG.getConstant(sizesofar, getPointerTy())); + theChains.push_back(DAG.getStore( + Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0)); + sizesofar += tmpvt.getStoreSizeInBits() / 8; ++idx; } } @@ -1190,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain, //} if (!OutChains.empty()) - DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size())); + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], + OutChains.size())); return Chain; } -SDValue -NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { +SDValue NVPTXTargetLowering::LowerReturn( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, + SelectionDAG &DAG) const { bool isABI = (nvptxSubtarget.getSmVersion() >= 20); unsigned sizesofar = 0; unsigned idx = 0; - for (unsigned i=0, e=Outs.size(); i!=e; ++i) { + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; EVT theValType = theVal.getValueType(); unsigned numElems = 1; - if (theValType.isVector()) numElems = theValType.getVectorNumElements(); - for (unsigned j=0,je=numElems; j!=je; ++j) { + if (theValType.isVector()) + numElems = theValType.getVectorNumElements(); + for (unsigned j = 0, je = numElems; j != je; ++j) { SDValue tmpval = theVal; if (theValType.isVector()) tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - theValType.getVectorElementType(), - tmpval, DAG.getIntPtrConstant(j)); - Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval, - dl, MVT::Other, - Chain, - DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), + theValType.getVectorElementType(), tmpval, + DAG.getIntPtrConstant(j)); + Chain = DAG.getNode( + isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl, + MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32), tmpval); if (theValType.isVector()) - sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8; + sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8; else - sizesofar += theValType.getStoreSizeInBits()/8; + sizesofar += theValType.getStoreSizeInBits() / 8; ++idx; } } @@ -1234,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain); } -void -NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector<SDValue> &Ops, - SelectionDAG &DAG) const -{ +void NVPTXTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { if (Constraint.length() > 1) return; else @@ -1249,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // NVPTX suuport vector of legal types of any length in Intrinsics because the // NVPTX specific type legalizer // will legalize them to the PTX supported length. -bool -NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { +bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { if (isTypeLegal(VT)) return true; if (VT.isVector()) { @@ -1261,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { return false; } - // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as // TgtMemIntrinsic // because we need the information that is only available in the "Value" type // of destination // pointer. In particular, the address space information. -bool -NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, - unsigned Intrinsic) const { +bool NVPTXTargetLowering::getTgtMemIntrinsic( + IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const { switch (Intrinsic) { default: return false; @@ -1325,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, /// Used to guide target specific optimizations, like loop strength reduction /// (LoopStrengthReduce.cpp) and memory optimization for address mode /// (CodeGenPrepare.cpp) -bool -NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { +bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg @@ -1345,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM, } switch (AM.Scale) { - case 0: // "r", "r+i" or "i" is allowed + case 0: // "r", "r+i" or "i" is allowed break; case 1: - if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. + if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed. return false; // Otherwise we have r+i. break; @@ -1385,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { return TargetLowering::getConstraintType(Constraint); } - -std::pair<unsigned, const TargetRegisterClass*> +std::pair<unsigned, const TargetRegisterClass *> NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { if (Constraint.size() == 1) { @@ -1409,8 +1441,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } - - /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; @@ -1418,7 +1448,7 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, - SmallVectorImpl<SDValue>& Results) { + SmallVectorImpl<SDValue> &Results) { EVT ResVT = N->getValueType(0); DebugLoc DL = N->getDebugLoc(); @@ -1429,7 +1459,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, // but I'm leaving that as a TODO for now. assert(ResVT.isSimple() && "Can only handle simple types"); switch (ResVT.getSimpleVT().SimpleTy) { - default: return; + default: + return; case MVT::v2i8: case MVT::v2i16: case MVT::v2i32: @@ -1460,7 +1491,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SDVTList LdResVTs; switch (NumElts) { - default: return; + default: + return; case 2: Opcode = NVPTXISD::LoadV2; LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); @@ -1500,14 +1532,14 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SDValue LoadChain = NewLD.getValue(NumElts); - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); Results.push_back(BuildVec); Results.push_back(LoadChain); } -static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, - SelectionDAG &DAG, +static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SmallVectorImpl<SDValue> &Results) { SDValue Chain = N->getOperand(0); SDValue Intrin = N->getOperand(1); @@ -1515,8 +1547,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // Get the intrinsic ID unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue(); - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1544,10 +1577,12 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SDVTList LdResVTs; switch (NumElts) { - default: return; + default: + return; case 2: - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1562,8 +1597,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other); break; case 4: { - switch(IntrinNo) { - default: return; + switch (IntrinNo) { + default: + return; case Intrinsic::nvvm_ldg_global_i: case Intrinsic::nvvm_ldg_global_f: case Intrinsic::nvvm_ldg_global_p: @@ -1586,29 +1622,31 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // Copy regular operands OtherOps.push_back(Chain); // Chain - // Skip operand 1 (intrinsic ID) - // Others + // Skip operand 1 (intrinsic ID) + // Others for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) OtherOps.push_back(N->getOperand(i)); MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N); - SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], - OtherOps.size(), MemSD->getMemoryVT(), - MemSD->getMemOperand()); + SDValue NewLD = DAG.getMemIntrinsicNode( + Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), + MemSD->getMemoryVT(), MemSD->getMemOperand()); SmallVector<SDValue, 4> ScalarRes; for (unsigned i = 0; i < NumElts; ++i) { SDValue Res = NewLD.getValue(i); if (NeedTrunc) - Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); + Res = + DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res); ScalarRes.push_back(Res); } SDValue LoadChain = NewLD.getValue(NumElts); - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + SDValue BuildVec = + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); Results.push_back(BuildVec); Results.push_back(LoadChain); @@ -1629,10 +1667,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, // We make sure the memory type is i8, which will be used during isel // to select the proper instruction. - SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, - LdResVTs, &Ops[0], - Ops.size(), MVT::i8, - MemSD->getMemOperand()); + SDValue NewLD = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], + Ops.size(), MVT::i8, MemSD->getMemOperand()); Results.push_back(NewLD.getValue(0)); Results.push_back(NewLD.getValue(1)); @@ -1641,11 +1678,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, } } -void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) const { +void NVPTXTargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { - default: report_fatal_error("Unhandled custom legalization"); + default: + report_fatal_error("Unhandled custom legalization"); case ISD::LOAD: ReplaceLoadVector(N, DAG, Results); return; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 14afc148cb..3cd49d38af 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -87,7 +87,7 @@ public: bool isTypeSupportedInIntrinsic(MVT VT) const; - bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I, + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const; /// isLegalAddressingMode - Return true if the addressing mode represented @@ -107,14 +107,13 @@ public: } ConstraintType getConstraintType(const std::string &Constraint) const; - std::pair<unsigned, const TargetRegisterClass*> + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - virtual SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const; + virtual SDValue LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const; @@ -136,17 +135,15 @@ public: NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::i32; - } + virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } virtual bool shouldSplitVectorElementType(EVT VT) const; private: - const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here + const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here - SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT = - MVT::i32) const; + SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, + EVT = MVT::i32) const; SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const; SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx); @@ -159,8 +156,7 @@ private: SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode *N, - SmallVectorImpl<SDValue> &Results, + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const; }; } // namespace llvm diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 9e73d80c28..33a63c26f4 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -23,61 +23,55 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include <cstdio> - using namespace llvm; // FIXME: Add the subtarget support on this constructor. NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) -: NVPTXGenInstrInfo(), - TM(tm), - RegInfo(*this, *TM.getSubtargetImpl()) {} - + : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {} -void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { +void NVPTXInstrInfo::copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const { if (NVPTX::Int32RegsRegClass.contains(DestReg) && NVPTX::Int32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int8RegsRegClass.contains(DestReg) && - NVPTX::Int8RegsRegClass.contains(SrcReg)) + NVPTX::Int8RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int1RegsRegClass.contains(DestReg) && - NVPTX::Int1RegsRegClass.contains(SrcReg)) + NVPTX::Int1RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float32RegsRegClass.contains(DestReg) && - NVPTX::Float32RegsRegClass.contains(SrcReg)) + NVPTX::Float32RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int16RegsRegClass.contains(DestReg) && - NVPTX::Int16RegsRegClass.contains(SrcReg)) + NVPTX::Int16RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Int64RegsRegClass.contains(DestReg) && - NVPTX::Int64RegsRegClass.contains(SrcReg)) + NVPTX::Int64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else if (NVPTX::Float64RegsRegClass.contains(DestReg) && - NVPTX::Float64RegsRegClass.contains(SrcReg)) + NVPTX::Float64RegsRegClass.contains(SrcReg)) BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)); else { llvm_unreachable("Don't know how to copy a register"); } } -bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, +bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const { // Look for the appropriate part of TSFlags bool isMove = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> - NVPTX::SimpleMoveShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift; isMove = (TSFlags == 1); if (isMove) { @@ -94,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, return false; } -bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const -{ +bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const { switch (MI.getOpcode()) { - default: return false; + default: + return false; case NVPTX::INT_PTX_SREG_NTID_X: case NVPTX::INT_PTX_SREG_NTID_Y: case NVPTX::INT_PTX_SREG_NTID_Z: @@ -115,12 +109,11 @@ bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const } } - bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isLoad = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> - NVPTX::isLoadShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift; isLoad = (TSFlags == 1); if (isLoad) AddrSpace = getLdStCodeAddrSpace(MI); @@ -130,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI, bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const { bool isStore = false; - unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> - NVPTX::isStoreShift; + unsigned TSFlags = + (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift; isStore = (TSFlags == 1); if (isStore) AddrSpace = getLdStCodeAddrSpace(MI); return isStore; } - bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { unsigned addrspace = 0; if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS) @@ -152,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { return true; } - /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns @@ -176,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const { /// Note that RemoveBranch and InsertBranch must be implemented to support /// cases where this method returns success. /// -bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const { +bool NVPTXInstrInfo::AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) @@ -208,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineInstr *SecondLastInst = I; // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) + if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) return true; // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it. if (SecondLastInst->getOpcode() == NVPTX::CBranch && LastInst->getOpcode() == NVPTX::GOTO) { - TBB = SecondLastInst->getOperand(1).getMBB(); + TBB = SecondLastInst->getOperand(1).getMBB(); Cond.push_back(SecondLastInst->getOperand(0)); FBB = LastInst->getOperand(0).getMBB(); return false; @@ -238,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; + if (I == MBB.begin()) + return 0; --I; if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch) return 0; @@ -248,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); - if (I == MBB.begin()) return 1; + if (I == MBB.begin()) + return 1; --I; if (I->getOpcode() != NVPTX::CBranch) return 1; @@ -258,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -unsigned -NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const { +unsigned NVPTXInstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -270,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // One-way branch. if (FBB == 0) { - if (Cond.empty()) // Unconditional branch + if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); - else // Conditional branch - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + else // Conditional branch + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()) + .addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, DL, get(NVPTX::CBranch)) - .addReg(Cond[0].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB); BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB); return 2; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 7b8e218b05..b1972e9b72 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -23,8 +23,7 @@ namespace llvm { -class NVPTXInstrInfo : public NVPTXGenInstrInfo -{ +class NVPTXInstrInfo : public NVPTXGenInstrInfo { NVPTXTargetMachine &TM; const NVPTXRegisterInfo RegInfo; public: @@ -50,30 +49,26 @@ public: * const TargetRegisterClass *RC) const; */ - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const ; - virtual bool isMoveInstr(const MachineInstr &MI, - unsigned &SrcReg, + virtual void copyPhysReg( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const; bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const; bool isReadSpecialReg(MachineInstr &MI) const; - virtual bool CanTailMerge(const MachineInstr *MI) const ; + virtual bool CanTailMerge(const MachineInstr *MI) const; // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl<MachineOperand> &Cond, - bool AllowModify) const; + virtual bool AnalyzeBranch( + MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond, - DebugLoc DL) const; + virtual unsigned InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const; unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const { - return MI.getOperand(2).getImm(); + return MI.getOperand(2).getImm(); } }; diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index f7fa7aa61d..7c257b4c6a 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -25,18 +25,15 @@ using namespace llvm; -namespace llvm { -FunctionPass *createLowerAggrCopies(); -} +namespace llvm { FunctionPass *createLowerAggrCopies(); } char NVPTXLowerAggrCopies::ID = 0; // Lower MemTransferInst or load-store pair to loop -static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, - Value *dstAddr, Value *len, - //unsigned numLoads, - bool srcVolatile, bool dstVolatile, - LLVMContext &Context, Function &F) { +static void convertTransferToLoop( + Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len, + //unsigned numLoads, + bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) { Type *indType = len->getType(); BasicBlock *origBB = splitAt->getParent(); @@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr, // srcAddr and dstAddr are expected to be pointer types, // so no check is made here. - unsigned srcAS = - dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointers to (char *) srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS)); @@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr, origBB->getTerminator()->setSuccessor(0, loopBB); IRBuilder<> builder(origBB, origBB->getTerminator()); - unsigned dstAS = - dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); + unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace(); // Cast pointer to the type of value getting stored - dstAddr = builder.CreateBitCast(dstAddr, - PointerType::get(val->getType(), dstAS)); + dstAddr = + builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS)); IRBuilder<> loop(loopBB); PHINode *ind = loop.CreatePHI(len->getType(), 0); @@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { //BasicBlock *bb = BI; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; - ++II) { - if (LoadInst * load = dyn_cast<LoadInst>(II)) { + ++II) { + if (LoadInst *load = dyn_cast<LoadInst>(II)) { - if (load->hasOneUse() == false) continue; + if (load->hasOneUse() == false) + continue; - if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue; + if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) + continue; User *use = *(load->use_begin()); - if (StoreInst * store = dyn_cast<StoreInst>(use)) { + if (StoreInst *store = dyn_cast<StoreInst>(use)) { if (store->getOperand(0) != load) //getValueOperand - continue; + continue; aggrLoads.push_back(load); } - } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) { + } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) { Value *len = intr->getLength(); // If the number of elements being copied is greater // than MaxAggrCopySize, lower it to a loop - if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemcpys.push_back(intr); } @@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // turn variable length memcpy/memmov into loop aggrMemcpys.push_back(intr); } - } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) { + } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) { Value *len = memsetintr->getLength(); - if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) { + if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) { if (len_int->getZExtValue() >= MaxAggrCopySize) { aggrMemsets.push_back(memsetintr); } @@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { } } } - if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) - && (aggrMemsets.size() == 0)) return false; + if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) && + (aggrMemsets.size() == 0)) + return false; // // Do the transformation of an aggr load/copy/set to a loop diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h index b4a4dbce98..a95c16b1e6 100644 --- a/lib/Target/NVPTX/NVPTXNumRegisters.h +++ b/lib/Target/NVPTX/NVPTXNumRegisters.h @@ -11,10 +11,6 @@ #ifndef NVPTX_NUM_REGISTERS_H #define NVPTX_NUM_REGISTERS_H -namespace llvm { - -const unsigned NVPTXNumRegisters = 396; - -} +namespace llvm { const unsigned NVPTXNumRegisters = 396; } #endif diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 350a2c5551..282465359b 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -23,69 +23,54 @@ #include "llvm/MC/MachineLocation.h" #include "llvm/Target/TargetInstrInfo.h" - using namespace llvm; -namespace llvm -{ -std::string getNVPTXRegClassName (TargetRegisterClass const *RC) { +namespace llvm { +std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return ".f32"; } if (RC == &NVPTX::Float64RegsRegClass) { return ".f64"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return ".s64"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return ".s32"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return ".s16"; } - // Int8Regs become 16-bit registers in PTX - else if (RC == &NVPTX::Int8RegsRegClass) { + // Int8Regs become 16-bit registers in PTX + else if (RC == &NVPTX::Int8RegsRegClass) { return ".s16"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return ".pred"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else { + } else { return "INTERNAL"; } return ""; } -std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { +std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { return "%f"; } if (RC == &NVPTX::Float64RegsRegClass) { return "%fd"; - } - else if (RC == &NVPTX::Int64RegsRegClass) { + } else if (RC == &NVPTX::Int64RegsRegClass) { return "%rd"; - } - else if (RC == &NVPTX::Int32RegsRegClass) { + } else if (RC == &NVPTX::Int32RegsRegClass) { return "%r"; - } - else if (RC == &NVPTX::Int16RegsRegClass) { + } else if (RC == &NVPTX::Int16RegsRegClass) { return "%rs"; - } - else if (RC == &NVPTX::Int8RegsRegClass) { + } else if (RC == &NVPTX::Int8RegsRegClass) { return "%rc"; - } - else if (RC == &NVPTX::Int1RegsRegClass) { + } else if (RC == &NVPTX::Int1RegsRegClass) { return "%p"; - } - else if (RC == &NVPTX::SpecialRegsRegClass) { + } else if (RC == &NVPTX::SpecialRegsRegClass) { return "!Special!"; - } - else { + } else { return "INTERNAL"; } return ""; @@ -94,23 +79,22 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) { NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st) - : NVPTXGenRegisterInfo(0), - Is64Bit(st.is64Bit()) {} + : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {} #define GET_REGINFO_TARGET_DESC #include "NVPTXGenRegisterInfo.inc" /// NVPTX Callee Saved Registers -const uint16_t* NVPTXRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const { +const uint16_t * +NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { static const uint16_t CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } // NVPTX Callee Saved Reg Classes -const TargetRegisterClass* const* +const TargetRegisterClass *const * NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; + static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 }; return CalleeSavedRegClasses; } @@ -119,10 +103,9 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -void NVPTXRegisterInfo:: -eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { +void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; @@ -130,15 +113,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, MachineFunction &MF = *MI.getParent()->getParent(); int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) + - MI.getOperand(FIOperandNum+1).getImm(); + MI.getOperand(FIOperandNum + 1).getImm(); // Using I0 as the frame pointer MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false); - MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } -int NVPTXRegisterInfo:: -getDwarfRegNum(unsigned RegNum, bool isEH) const { +int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return 0; } @@ -146,7 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return NVPTX::VRFrame; } -unsigned NVPTXRegisterInfo::getRARegister() const { - return 0; -} - +unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 69f73f213c..d406820661 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -17,7 +17,6 @@ #include "ManagedStringPool.h" #include "llvm/Target/TargetRegisterInfo.h" - #define GET_REGINFO_HEADER #include "NVPTXGenRegisterInfo.inc" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,30 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo { private: bool Is64Bit; // Hold Strings that can be free'd all together with NVPTXRegisterInfo - ManagedStringPool ManagedStrPool; + ManagedStringPool ManagedStrPool; public: - NVPTXRegisterInfo(const TargetInstrInfo &tii, - const NVPTXSubtarget &st); - + NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st); //------------------------------------------------------ // Pure virtual functions from TargetRegisterInfo //------------------------------------------------------ // NVPTX callee saved registers - virtual const uint16_t* + virtual const uint16_t * getCalleeSavedRegs(const MachineFunction *MF = 0) const; // NVPTX callee saved register classes - virtual const TargetRegisterClass* const * + virtual const TargetRegisterClass *const * getCalleeSavedRegClasses(const MachineFunction *MF) const; virtual BitVector getReservedRegs(const MachineFunction &MF) const; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS=NULL) const; + virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = NULL) const; virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; virtual unsigned getFrameRegister(const MachineFunction &MF) const; @@ -74,11 +71,9 @@ public: }; - -std::string getNVPTXRegClassName (const TargetRegisterClass *RC); -std::string getNVPTXRegClassStr (const TargetRegisterClass *RC); +std::string getNVPTXRegClassName(const TargetRegisterClass *RC); +std::string getNVPTXRegClassStr(const TargetRegisterClass *RC); } // end namespace llvm - #endif diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp index babe29500d..83dfe12089 100644 --- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp +++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp @@ -21,9 +21,7 @@ using namespace llvm; -namespace llvm { -FunctionPass *createSplitBBatBarPass(); -} +namespace llvm { FunctionPass *createSplitBBatBarPass(); } char NVPTXSplitBBatBar::ID = 0; @@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) { // This interface will most likely not be necessary, because this pass will // not be invoked by the driver, but will be used as a prerequisite to // another pass. -FunctionPass *llvm::createSplitBBatBarPass() { - return new NVPTXSplitBBatBar(); -} +FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 7b62cce2c6..2dcd73dcff 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -22,27 +22,23 @@ using namespace llvm; // Select Driver Interface #include "llvm/Support/CommandLine.h" namespace { -cl::opt<NVPTX::DrvInterface> -DriverInterface(cl::desc("Choose driver interface:"), - cl::values( - clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), - clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), - clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), - clEnumValEnd), - cl::init(NVPTX::NVCL)); +cl::opt<NVPTX::DrvInterface> DriverInterface( + cl::desc("Choose driver interface:"), + cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"), + clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"), + clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd), + cl::init(NVPTX::NVCL)); } NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit) -: NVPTXGenSubtargetInfo(TT, CPU, FS), - Is64Bit(is64Bit), - PTXVersion(0), - SmVersion(10) { + : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), + SmVersion(20) { drvInterface = DriverInterface; // Provide the default CPU if none - std::string defCPU = "sm_10"; + std::string defCPU = "sm_20"; ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index beea77e38d..670077daaa 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -25,7 +25,7 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { - + std::string TargetName; NVPTX::DrvInterface drvInterface; bool Is64Bit; @@ -61,13 +61,10 @@ public: bool hasLDU() const { return SmVersion >= 20; } bool hasGenericLdSt() const { return SmVersion >= 20; } inline bool hasHWROT32() const { return false; } - inline bool hasSWROT32() const { - return true; - } - inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; } + inline bool hasSWROT32() const { return true; } + inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } - bool is64Bit() const { return Is64Bit; } unsigned int getSmVersion() const { return SmVersion; } @@ -96,4 +93,4 @@ public: } // End llvm namespace -#endif // NVPTXSUBTARGET_H +#endif // NVPTXSUBTARGET_H diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index cd765fa8cb..67ca6b58e5 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -45,9 +45,11 @@ #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Scalar.h" - using namespace llvm; +namespace llvm { +void initializeNVVMReflectPass(PassRegistry&); +} extern "C" void LLVMInitializeNVPTXTarget() { // Register the target. @@ -57,52 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() { RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32); RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64); + // FIXME: This pass is really intended to be invoked during IR optimization, + // but it's very NVPTX-specific. + initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); } -NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, - StringRef TT, - StringRef CPU, - StringRef FS, - const TargetOptions& Options, - Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64bit) -: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64bit), - DL(Subtarget.getDataLayout()), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit) -/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { -} - - +NVPTXTargetMachine::NVPTXTargetMachine( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()), + InstrInfo(*this), TLInfo(*this), TSInfo(*this), + FrameLowering( + *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {} void NVPTXTargetMachine32::anchor() {} -NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) { -} +NVPTXTargetMachine32::NVPTXTargetMachine32( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} void NVPTXTargetMachine64::anchor() {} -NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) -: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) { -} - +NVPTXTargetMachine64::NVPTXTargetMachine64( + const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} namespace llvm { class NVPTXPassConfig : public TargetPassConfig { public: NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) {} NVPTXTargetMachine &getNVPTXTargetMachine() const { return getTM<NVPTXTargetMachine>(); @@ -126,6 +118,4 @@ bool NVPTXPassConfig::addInstSelector() { return false; } -bool NVPTXPassConfig::addPreRegAlloc() { - return false; -} +bool NVPTXPassConfig::addPreRegAlloc() { return false; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 1a732be1ad..5fbcf735b4 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// - #ifndef NVPTX_TARGETMACHINE_H #define NVPTX_TARGETMACHINE_H @@ -31,42 +30,40 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { - NVPTXSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - NVPTXInstrInfo InstrInfo; - NVPTXTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; + NVPTXSubtarget Subtarget; + const DataLayout DL; // Calculates type size & alignment + NVPTXInstrInfo InstrInfo; + NVPTXTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; // NVPTX does not have any call stack frame, but need a NVPTX specific // FrameLowering class because TargetFrameLowering is abstract. - NVPTXFrameLowering FrameLowering; + NVPTXFrameLowering FrameLowering; // Hold Strings that can be free'd all together with NVPTXTargetMachine - ManagedStringPool ManagedStrPool; + ManagedStringPool ManagedStrPool; //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, // bool DisableVerify, MCContext *&OutCtx); public: - NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OP, - bool is64bit); + NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } - virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const DataLayout *getDataLayout() const { return &DL;} - virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;} + virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const DataLayout *getDataLayout() const { return &DL; } + virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } virtual const NVPTXRegisterInfo *getRegisterInfo() const { return &(InstrInfo.getRegisterInfo()); } virtual NVPTXTargetLowering *getTargetLowering() const { - return const_cast<NVPTXTargetLowering*>(&TLInfo); + return const_cast<NVPTXTargetLowering *>(&TLInfo); } virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { @@ -79,22 +76,19 @@ public: //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level); ManagedStringPool *getManagedStrPool() const { - return const_cast<ManagedStringPool*>(&ManagedStrPool); + return const_cast<ManagedStringPool *>(&ManagedStrPool); } virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); // Emission of machine code through JITCodeEmitter is not supported. - virtual bool addPassesToEmitMachineCode(PassManagerBase &, - JITCodeEmitter &, + virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, bool = true) { return true; } // Emission of machine code through MCJIT is not supported. - virtual bool addPassesToEmitMC(PassManagerBase &, - MCContext *&, - raw_ostream &, + virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, bool = true) { return true; } @@ -119,7 +113,6 @@ public: CodeGenOpt::Level OL); }; - } // end namespace llvm #endif diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index b5698a2fc0..6ab0e08ad0 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -46,45 +46,43 @@ public: } virtual void Initialize(MCContext &ctx, const TargetMachine &TM) { - TextSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getText()); - DataSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getDataRel()); - BSSSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getBSS()); - ReadOnlySection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getReadOnly()); + TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText()); + DataSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel()); + BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS()); + ReadOnlySection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly()); - StaticCtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - StaticDtorSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - LSDASection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - EHFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfStrSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfLocSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); - DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF, - SectionKind::getMetadata()); + StaticCtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + StaticDtorSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + LSDASection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + EHFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfAbbrevSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfFrameSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfPubTypesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfDebugInlineSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfStrSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLocSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfARangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfRangesSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfMacroInfoSection = + new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata()); } virtual const MCSection *getSectionForConstant(SectionKind Kind) const { @@ -93,8 +91,7 @@ public: virtual const MCSection * getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, - Mangler *Mang, - const TargetMachine &TM) const { + Mangler *Mang, const TargetMachine &TM) const { return DataSection; } diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 1ccc9f7c02..6786eb0224 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t; ManagedStatic<per_module_annot_t> annotationCache; - static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); @@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { assert(prop && "Annotation property not a string"); // value - ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1)); + ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1)); assert(Val && "Value operand not a constant int"); std::string keyname = prop->getString().str(); @@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop, bool llvm::isTexture(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE], + annot)) { assert((annot == 1) && "Unexpected annotation on a texture symbol"); return true; } @@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) { bool llvm::isSurface(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE], + annot)) { assert((annot == 1) && "Unexpected annotation on a surface symbol"); return true; } @@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) { bool llvm::isSampler(const llvm::Value &val) { if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) { unsigned annot; - if (llvm::findOneNVVMAnnotation(gv, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findOneNVVMAnnotation( + gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { assert((annot == 1) && "Unexpected annotation on a sampler symbol"); return true; } @@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) { if (const Argument *arg = dyn_cast<Argument>(&val)) { const Function *func = arg->getParent(); std::vector<unsigned> annot; - if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], - annot)) { + if (llvm::findAllNVVMAnnotation( + func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISREADONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) { const Function *func = arg->getParent(); std::vector<unsigned> annot; if (llvm::findAllNVVMAnnotation(func, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], - annot)) { + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM], + annot)) { if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) return true; } @@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) { } bool llvm::getMaxNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x)); } bool llvm::getMaxNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y)); } bool llvm::getMaxNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z)); } bool llvm::getReqNTIDx(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x)); } bool llvm::getReqNTIDy(const Function &F, unsigned &y) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], - y)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y)); } bool llvm::getReqNTIDz(const Function &F, unsigned &z) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], - z)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z)); } bool llvm::getMinCTASm(const Function &F, unsigned &x) { - return (llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], - x)); + return (llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x)); } bool llvm::isKernelFunction(const Function &F) { unsigned x = 0; - bool retval = llvm::findOneNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], - x); + bool retval = llvm::findOneNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x); if (retval == false) { // There is no NVVM metadata, check the calling convention if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel) @@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) { else return false; } - return (x==1); + return (x == 1); } bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { std::vector<unsigned> Vs; - bool retval = llvm::findAllNVVMAnnotation(&F, - llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], - Vs); + bool retval = llvm::findAllNVVMAnnotation( + &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs); if (retval == false) return false; - for (int i=0, e=Vs.size(); i<e; i++) { + for (int i = 0, e = Vs.size(); i < e; i++) { unsigned v = Vs[i]; - if ( (v >> 16) == index ) { - align = v & 0xFFFF; + if ((v >> 16) == index) { + align = v & 0xFFFF; return true; } } @@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) { bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) { if (MDNode *alignNode = I.getMetadata("callalign")) { - for (int i=0, n = alignNode->getNumOperands(); - i<n; i++) { + for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) { if (const ConstantInt *CI = - dyn_cast<ConstantInt>(alignNode->getOperand(i))) { + dyn_cast<ConstantInt>(alignNode->getOperand(i))) { unsigned v = CI->getZExtValue(); - if ( (v>>16) == index ) { + if ((v >> 16) == index) { align = v & 0xFFFF; return true; } - if ( (v>>16) > index ) { + if ((v >> 16) > index) { return false; } } @@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) { // consider several special intrinsics in striping pointer casts, and // provide an option to ignore GEP indicies for find out the base address only // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - bool ignore_GEP_indices) { +const Value * +llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) { V = V->stripPointerCasts(); while (true) { if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) { @@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V, // - ignore GEP indicies for find out the base address only, and // - tracking PHINode // which could be used in simple alias disambigurate. -const Value *llvm::skipPointerTransfer(const Value *V, - std::set<const Value *> &processed) { +const Value * +llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) { if (processed.find(V) != processed.end()) return NULL; processed.insert(V); @@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V, return V; } - // The following are some useful utilities for debuggung BasicBlock *llvm::getParentBlock(Value *v) { diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index 247e09b8bc..a208004297 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -23,8 +23,7 @@ #include <string> #include <vector> -namespace llvm -{ +namespace llvm { #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly" #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" @@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID); /// to pass into type construction of CallInst ctors. This turns a null /// terminated list of pointers (or other value types) into a real live vector. /// -template<typename T> -inline std::vector<T> make_vector(T A, ...) { +template <typename T> inline std::vector<T> make_vector(T A, ...) { va_list Args; va_start(Args, A); std::vector<T> Result; @@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) { bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id); const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices); -const Value *skipPointerTransfer(const Value *V, - std::set<const Value *> &processed); +const Value * +skipPointerTransfer(const Value *V, std::set<const Value *> &processed); BasicBlock *getParentBlock(Value *v); Function *getParentFunction(Value *v); void dumpBlock(Value *v, char *blockName); diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp index 6a0e5328f6..5f074b33a2 100644 --- a/lib/Target/NVPTX/NVPTXutil.cpp +++ b/lib/Target/NVPTX/NVPTXutil.cpp @@ -18,8 +18,7 @@ using namespace llvm; namespace llvm { -bool isParamLoad(const MachineInstr *MI) -{ +bool isParamLoad(const MachineInstr *MI) { if ((MI->getOpcode() != NVPTX::LD_i32_avar) && (MI->getOpcode() != NVPTX::LD_i64_avar)) return false; @@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI) return true; } -#define DATA_MASK 0x7f -#define DIGIT_WIDTH 7 -#define MORE_BYTES 0x80 +#define DATA_MASK 0x7f +#define DIGIT_WIDTH 7 +#define MORE_BYTES 0x80 -static int encode_leb128(uint64_t val, int *nbytes, - char *space, int splen) -{ +static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) { char *a; char *end = space + splen; @@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes, #undef DIGIT_WIDTH #undef MORE_BYTES -uint64_t encode_leb128(const char *str) -{ - union { uint64_t x; char a[8]; } temp64; +uint64_t encode_leb128(const char *str) { + union { + uint64_t x; + char a[8]; + } temp64; temp64.x = 0; - for (unsigned i=0,e=strlen(str); i!=e; ++i) - temp64.a[i] = str[e-1-i]; + for (unsigned i = 0, e = strlen(str); i != e; ++i) + temp64.a[i] = str[e - 1 - i]; char encoded[16]; int nbytes; int retval = encode_leb128(temp64.x, &nbytes, encoded, 16); - (void)retval; - assert(retval == 0 && - "Encoding to leb128 failed"); + (void) retval; + assert(retval == 0 && "Encoding to leb128 failed"); assert(nbytes <= 8 && "Cannot support register names with leb128 encoding > 8 bytes"); temp64.x = 0; - for (int i=0; i<nbytes; ++i) + for (int i = 0; i < nbytes; ++i) temp64.a[i] = encoded[i]; return temp64.x; diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp new file mode 100644 index 0000000000..3bbd1a13da --- /dev/null +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -0,0 +1,193 @@ +//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces occurences of __nvvm_reflect("string") with an +// integer based on -nvvm-reflect-list string=<int> option given to this pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" +#include "llvm/Pass.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <map> +#include <sstream> +#include <string> +#include <vector> + +#define NVVM_REFLECT_FUNCTION "__nvvm_reflect" + +using namespace llvm; + +namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } + +namespace { +class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass { +private: + //std::map<std::string, int> VarMap; + StringMap<int> VarMap; + typedef std::map<std::string, int>::iterator VarMapIter; + Function *reflectFunction; + +public: + static char ID; + NVVMReflect() : ModulePass(ID) { + VarMap.clear(); + reflectFunction = 0; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + virtual bool runOnModule(Module &); + + void setVarMap(); +}; +} + +static cl::opt<bool> +NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), + cl::desc("NVVM reflection, enabled by default")); + +char NVVMReflect::ID = 0; +INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", + "Replace occurences of __nvvm_reflect() calls with 0/1", false, + false) + +static cl::list<std::string> +ReflectList("nvvm-reflect-list", cl::value_desc("name=0/1"), + cl::desc("A list of string=num assignments, where num=0 or 1"), + cl::ValueRequired); + +/// This function does the same operation as perl's split. +/// For example, calling this with ("a=1,b=2,c=0", ",") will +/// return ["a=1", "b=2", "c=0"] in the return std::vector. +static std::vector<std::string> +Tokenize(const std::string &str, const std::string &delim) { + std::vector<std::string> tokens; + + size_t p0 = 0, p1 = std::string::npos; + while (p0 != std::string::npos) { + p1 = str.find_first_of(delim, p0); + if (p1 != p0) { + std::string token = str.substr(p0, p1 - p0); + tokens.push_back(token); + } + p0 = str.find_first_not_of(delim, p1); + } + + return tokens; +} + +/// The command line can look as follows : +/// -R a=1,b=2 -R c=3,d=0 -R e=2 +/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the +/// ReflectList vector. First, each of ReflectList[i] is 'split' +/// using "," as the delimiter. Then each of this part is split +/// using "=" as the delimiter. +void NVVMReflect::setVarMap() { + for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) { + // DEBUG(dbgs() << "Option : " << ReflectList[i] << std::endl); + std::vector<std::string> nameValList = Tokenize(ReflectList[i], ","); + for (unsigned j = 0, ej = nameValList.size(); j != ej; ++j) { + std::vector<std::string> nameValPair = Tokenize(nameValList[j], "="); + assert(nameValPair.size() == 2 && "name=val expected"); + std::stringstream valstream(nameValPair[1]); + int val; + valstream >> val; + assert((!(valstream.fail())) && "integer value expected"); + VarMap[nameValPair[0]] = val; + } + } +} + +bool NVVMReflect::runOnModule(Module &M) { + if (!NVVMReflectEnabled) + return false; + + setVarMap(); + + reflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); + + // If reflect function is not used, then there will be + // no entry in the module. + if (reflectFunction == 0) { + return false; + } + + // Validate _reflect function + assert(reflectFunction->isDeclaration() && + "_reflect function should not have a body"); + assert(reflectFunction->getReturnType()->isIntegerTy() && + "_reflect's return type should be integer"); + + std::vector<Instruction *> toRemove; + + // Go through the uses of reflectFunction in this Function. + // Each of them should a CallInst with a ConstantArray argument. + // First validate that. If the c-string corresponding to the + // ConstantArray can be found successfully, see if it can be + // found in VarMap. If so, replace the uses of CallInst with the + // value found in VarMap. If not, replace the use with value 0. + for (Value::use_iterator iter = reflectFunction->use_begin(), + iterEnd = reflectFunction->use_end(); + iter != iterEnd; ++iter) { + assert(isa<CallInst>(*iter) && "Only a call instruction can use _reflect"); + CallInst *reflect = cast<CallInst>(*iter); + + assert((reflect->getNumOperands() == 2) && + "Only one operand expect for _reflect function"); + // In cuda, we will have an extra constant-to-generic conversion of + // the string. + const Value *conv = reflect->getArgOperand(0); + assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion"); + const CallInst *convcall = cast<CallInst>(conv); + const Value *str = convcall->getArgOperand(0); + assert(isa<ConstantExpr>(str) && + "Format of _reflect function not recognized"); + const ConstantExpr *gep = cast<ConstantExpr>(str); + + const Value *sym = gep->getOperand(0); + assert(isa<Constant>(sym) && "Format of _reflect function not recognized"); + + const Constant *symstr = cast<Constant>(sym); + + assert(isa<ConstantDataSequential>(symstr->getOperand(0)) && + "Format of _reflect function not recognized"); + + assert(cast<ConstantDataSequential>(symstr->getOperand(0))->isCString() && + "Format of _reflect function not recognized"); + + std::string reflectArg = + cast<ConstantDataSequential>(symstr->getOperand(0))->getAsString(); + + reflectArg = reflectArg.substr(0, reflectArg.size() - 1); + // DEBUG(dbgs() << "Arg of _reflect : " << reflectArg << std::endl); + + int reflectVal = 0; // The default value is 0 + if (VarMap.find(reflectArg) != VarMap.end()) { + reflectVal = VarMap[reflectArg]; + } + reflect->replaceAllUsesWith( + ConstantInt::get(reflect->getType(), reflectVal)); + toRemove.push_back(reflect); + } + if (toRemove.size() == 0) + return false; + + for (unsigned i = 0, e = toRemove.size(); i != e; ++i) + toRemove[i]->eraseFromParent(); + return true; +} diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp index 6c801b875e..cc7d4dc5ec 100644 --- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp +++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp @@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64; extern "C" void LLVMInitializeNVPTXTargetInfo() { RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx", - "NVIDIA PTX 32-bit"); + "NVIDIA PTX 32-bit"); RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64", - "NVIDIA PTX 64-bit"); + "NVIDIA PTX 64-bit"); } diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h index a7347efd78..45cc0b8b67 100644 --- a/lib/Target/NVPTX/cl_common_defines.h +++ b/lib/Target/NVPTX/cl_common_defines.h @@ -24,22 +24,21 @@ enum { CLK_LUMINANCE = 0x10B9 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - , + , CLK_Rx = 0x10BA, CLK_RGx = 0x10BB, CLK_RGBx = 0x10BC #endif }; - typedef enum clk_channel_type { // valid formats for float return types - CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 - CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 - CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 - CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 - CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half - CLK_FLOAT = 0x10DE, // four channel RGBA float + CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8 + CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16 + CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8 + CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16 + CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half + CLK_FLOAT = 0x10DE, // four channel RGBA float #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) CLK_UNORM_SHORT_565 = 0x10D4, @@ -48,7 +47,7 @@ typedef enum clk_channel_type { #endif // valid only for integer return types - CLK_SIGNED_INT8 = 0x10D7, + CLK_SIGNED_INT8 = 0x10D7, CLK_SIGNED_INT16 = 0x10D8, CLK_SIGNED_INT32 = 0x10D9, CLK_UNSIGNED_INT8 = 0x10DA, @@ -56,70 +55,68 @@ typedef enum clk_channel_type { CLK_UNSIGNED_INT32 = 0x10DC, // CI SPI for CPU - __CLK_UNORM_INT8888 , // four channel ARGB unorm8 - __CLK_UNORM_INT8888R, // four channel BGRA unorm8 + __CLK_UNORM_INT8888, // four channel ARGB unorm8 + __CLK_UNORM_INT8888R, // four channel BGRA unorm8 __CLK_VALID_IMAGE_TYPE_COUNT, __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT, - __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to - // represent any image type - __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1 -}clk_channel_type; + __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to + // represent any image type + __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1 +} clk_channel_type; typedef enum clk_sampler_type { - __CLK_ADDRESS_BASE = 0, - CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, - CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, + __CLK_ADDRESS_BASE = 0, + CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE, + CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE, #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1) - CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, + CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR, #endif - __CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | - CLK_ADDRESS_CLAMP_TO_EDGE | - CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, - __CLK_ADDRESS_BITS = 3, // number of bits required to - // represent address info - - __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, - CLK_NORMALIZED_COORDS_FALSE = 0, - CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, - __CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE | - CLK_NORMALIZED_COORDS_TRUE, - __CLK_NORMALIZED_BITS = 1, // number of bits required to - // represent normalization - - __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + - __CLK_NORMALIZED_BITS, - CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, - CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, - CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, - __CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | - CLK_FILTER_ANISOTROPIC, - __CLK_FILTER_BITS = 2, // number of bits required to - // represent address info - - __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, - CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, - CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, - CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, - __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | - CLK_MIP_ANISOTROPIC, - __CLK_MIP_BITS = 2, - - __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, - __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | - __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, - - __CLK_ANISOTROPIC_RATIO_BITS = 5, - __CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >> - (__CLK_ANISOTROPIC_RATIO_BITS-1) + __CLK_ADDRESS_MASK = + CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR, + __CLK_ADDRESS_BITS = 3, // number of bits required to + // represent address info + + __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS, + CLK_NORMALIZED_COORDS_FALSE = 0, + CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE, + __CLK_NORMALIZED_MASK = + CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE, + __CLK_NORMALIZED_BITS = 1, // number of bits required to + // represent normalization + + __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS, + CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE, + CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE, + CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE, + __CLK_FILTER_MASK = + CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC, + __CLK_FILTER_BITS = 2, // number of bits required to + // represent address info + + __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS, + CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE, + CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE, + CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE, + __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC, + __CLK_MIP_BITS = 2, + + __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS, + __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK | + __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK, + + __CLK_ANISOTROPIC_RATIO_BITS = 5, + __CLK_ANISOTROPIC_RATIO_MASK = + (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1) } clk_sampler_type; // Memory synchronization -#define CLK_LOCAL_MEM_FENCE (1 << 0) -#define CLK_GLOBAL_MEM_FENCE (1 << 1) +#define CLK_LOCAL_MEM_FENCE (1 << 0) +#define CLK_GLOBAL_MEM_FENCE (1 << 1) #endif // __CL_COMMON_DEFINES_H__ diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index 3d583060d1..bacc108c62 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -13,7 +13,7 @@ #define DEBUG_TYPE "asm-printer" #include "PPCInstPrinter.h" -#include "MCTargetDesc/PPCBaseInfo.h" +#include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" @@ -87,35 +87,9 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { unsigned Code = MI->getOperand(OpNo).getImm(); - if (!Modifier) { - unsigned CCReg = MI->getOperand(OpNo+1).getReg(); - unsigned RegNo; - switch (CCReg) { - default: llvm_unreachable("Unknown CR register"); - case PPC::CR0: RegNo = 0; break; - case PPC::CR1: RegNo = 1; break; - case PPC::CR2: RegNo = 2; break; - case PPC::CR3: RegNo = 3; break; - case PPC::CR4: RegNo = 4; break; - case PPC::CR5: RegNo = 5; break; - case PPC::CR6: RegNo = 6; break; - case PPC::CR7: RegNo = 7; break; - } - - // Print the CR bit number. The Code is ((BI << 5) | BO) for a - // BCC, but we must have the positive form here (BO == 12) - unsigned BI = Code >> 5; - assert((Code & 0xF) == 12 && - "BO in predicate bit must have the positive form"); - - unsigned Value = 4*RegNo + BI; - O << Value; - return; - } if (StringRef(Modifier) == "cc") { switch ((PPC::Predicate)Code) { - case PPC::PRED_ALWAYS: return; // Don't print anything for always. case PPC::PRED_LT: O << "lt"; return; case PPC::PRED_LE: O << "le"; return; case PPC::PRED_EQ: O << "eq"; return; @@ -129,8 +103,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo, assert(StringRef(Modifier) == "reg" && "Need to specify 'cc' or 'reg' as predicate op modifier!"); - // Don't print the register for 'always'. - if (Code == PPC::PRED_ALWAYS) return; printOperand(MI, OpNo+1, O); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index f24edf62ed..ec2657403e 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -30,13 +30,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case FK_Data_2: case FK_Data_4: case FK_Data_8: - case PPC::fixup_ppc_toc: case PPC::fixup_ppc_tlsreg: case PPC::fixup_ppc_nofixup: return Value; - case PPC::fixup_ppc_lo14: - case PPC::fixup_ppc_toc16_ds: - return (Value & 0xffff) << 2; case PPC::fixup_ppc_brcond14: return Value & 0xfffc; case PPC::fixup_ppc_br24: @@ -48,8 +44,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_ha16: return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff; case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_toc16: return Value & 0xffff; + case PPC::fixup_ppc_lo16_ds: + return Value & 0xfffc; } } @@ -82,10 +79,7 @@ public: { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_ppc_lo16", 16, 16, 0 }, { "fixup_ppc_ha16", 16, 16, 0 }, - { "fixup_ppc_lo14", 16, 14, 0 }, - { "fixup_ppc_toc", 0, 64, 0 }, - { "fixup_ppc_toc16", 16, 16, 0 }, - { "fixup_ppc_toc16_ds", 16, 14, 0 }, + { "fixup_ppc_lo16_ds", 16, 14, 0 }, { "fixup_ppc_tlsreg", 0, 0, 0 }, { "fixup_ppc_nofixup", 0, 0, 0 } }; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h deleted file mode 100644 index 9c975c089e..0000000000 --- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h +++ /dev/null @@ -1,70 +0,0 @@ -//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the PPC target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef PPCBASEINFO_H -#define PPCBASEINFO_H - -#include "PPCMCTargetDesc.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -/// getPPCRegisterNumbering - Given the enum value for some register, e.g. -/// PPC::F14, return the number that it corresponds to (e.g. 14). -inline static unsigned getPPCRegisterNumbering(unsigned RegEnum) { - using namespace PPC; - switch (RegEnum) { - case 0: return 0; - case R0 : case X0 : case F0 : case V0 : case CR0: case CR0LT: return 0; - case R1 : case X1 : case F1 : case V1 : case CR1: case CR0GT: return 1; - case R2 : case X2 : case F2 : case V2 : case CR2: case CR0EQ: return 2; - case R3 : case X3 : case F3 : case V3 : case CR3: case CR0UN: return 3; - case R4 : case X4 : case F4 : case V4 : case CR4: case CR1LT: return 4; - case R5 : case X5 : case F5 : case V5 : case CR5: case CR1GT: return 5; - case R6 : case X6 : case F6 : case V6 : case CR6: case CR1EQ: return 6; - case R7 : case X7 : case F7 : case V7 : case CR7: case CR1UN: return 7; - case R8 : case X8 : case F8 : case V8 : case CR2LT: return 8; - case R9 : case X9 : case F9 : case V9 : case CR2GT: return 9; - case R10: case X10: case F10: case V10: case CR2EQ: return 10; - case R11: case X11: case F11: case V11: case CR2UN: return 11; - case R12: case X12: case F12: case V12: case CR3LT: return 12; - case R13: case X13: case F13: case V13: case CR3GT: return 13; - case R14: case X14: case F14: case V14: case CR3EQ: return 14; - case R15: case X15: case F15: case V15: case CR3UN: return 15; - case R16: case X16: case F16: case V16: case CR4LT: return 16; - case R17: case X17: case F17: case V17: case CR4GT: return 17; - case R18: case X18: case F18: case V18: case CR4EQ: return 18; - case R19: case X19: case F19: case V19: case CR4UN: return 19; - case R20: case X20: case F20: case V20: case CR5LT: return 20; - case R21: case X21: case F21: case V21: case CR5GT: return 21; - case R22: case X22: case F22: case V22: case CR5EQ: return 22; - case R23: case X23: case F23: case V23: case CR5UN: return 23; - case R24: case X24: case F24: case V24: case CR6LT: return 24; - case R25: case X25: case F25: case V25: case CR6GT: return 25; - case R26: case X26: case F26: case V26: case CR6EQ: return 26; - case R27: case X27: case F27: case V27: case CR6UN: return 27; - case R28: case X28: case F28: case V28: case CR7LT: return 28; - case R29: case X29: case F29: case V29: case CR7GT: return 29; - case R30: case X30: case F30: case V30: case CR7EQ: return 30; - case R31: case X31: case F31: case V31: case CR7UN: return 31; - default: - llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!"); - } -} - -} // end namespace llvm; - -#endif diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 61868d446f..84e4175e63 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -133,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_None: Type = ELF::R_PPC_ADDR16_LO; break; + case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: + Type = ELF::R_PPC64_TOC16; + break; case MCSymbolRefExpr::VK_PPC_TOC16_LO: Type = ELF::R_PPC64_TOC16_LO; break; @@ -144,35 +147,12 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, break; } break; - case PPC::fixup_ppc_lo14: - Type = ELF::R_PPC_ADDR14; - break; - case PPC::fixup_ppc_toc: - Type = ELF::R_PPC64_TOC; - break; - case PPC::fixup_ppc_toc16: + case PPC::fixup_ppc_lo16_ds: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_PPC_TPREL16_LO: - Type = ELF::R_PPC64_TPREL16_LO; - break; - case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: - Type = ELF::R_PPC64_DTPREL16_LO; - break; case MCSymbolRefExpr::VK_None: - Type = ELF::R_PPC64_TOC16; - break; - case MCSymbolRefExpr::VK_PPC_TOC16_LO: - Type = ELF::R_PPC64_TOC16_LO; - break; - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: - Type = ELF::R_PPC64_GOT_TLSLD16_LO; + Type = ELF::R_PPC64_ADDR16_DS; break; - } - break; - case PPC::fixup_ppc_toc16_ds: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_PPC_TOC_ENTRY: Type = ELF::R_PPC64_TOC16_DS; break; @@ -253,8 +233,7 @@ adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { switch ((unsigned)Fixup.getKind()) { case PPC::fixup_ppc_ha16: case PPC::fixup_ppc_lo16: - case PPC::fixup_ppc_toc16: - case PPC::fixup_ppc_toc16_ds: + case PPC::fixup_ppc_lo16_ds: RelocOffset += 2; break; default: diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 7917f7736e..86c44f57a5 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -12,6 +12,8 @@ #include "llvm/MC/MCFixup.h" +#undef PPC + namespace llvm { namespace PPC { enum Fixups { @@ -31,19 +33,9 @@ enum Fixups { /// like 'lis'. fixup_ppc_ha16, - /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs - /// like 'std'. - fixup_ppc_lo14, - - /// fixup_ppc_toc - Insert value of TOC base (.TOC.). - fixup_ppc_toc, - - /// fixup_ppc_toc16 - A 16-bit signed fixup relative to the TOC base. - fixup_ppc_toc16, - - /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with - /// implied 2 zero bits - fixup_ppc_toc16_ds, + /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with + /// implied 2 zero bits for instrs like 'std'. + fixup_ppc_lo16_ds, /// fixup_ppc_tlsreg - Insert thread-pointer register number. fixup_ppc_tlsreg, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index d048426d43..2223cd623c 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -13,10 +13,10 @@ #define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/PPCMCTargetDesc.h" -#include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" @@ -33,24 +33,17 @@ class PPCMCCodeEmitter : public MCCodeEmitter { void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCSubtargetInfo &STI; + const MCContext &CTX; Triple TT; public: PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, MCContext &ctx) - : STI(sti), TT(STI.getTargetTriple()) { + : STI(sti), CTX(ctx), TT(STI.getTargetTriple()) { } ~PPCMCCodeEmitter() {} - bool is64BitMode() const { - return (STI.getFeatureBits() & PPC::Feature64Bit) != 0; - } - - bool isSVR4ABI() const { - return TT.isMacOSX() == 0; - } - unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups) const; unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo, @@ -81,12 +74,11 @@ public: SmallVectorImpl<MCFixup> &Fixups) const { uint64_t Bits = getBinaryCodeForInstr(MI, Fixups); - // BL8_NOP_ELF, BLA8_NOP_ELF, etc., all have a size of 8 because of the - // following 'nop'. + // BL8_NOP etc. all have a size of 8 because of the following 'nop'. unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value! unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_ELF || Opcode == PPC::BLA8_NOP_ELF || - Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) + if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP || + Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) Size = 8; // Output the constant in big endian byte order. @@ -121,11 +113,11 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo, (MCFixupKind)PPC::fixup_ppc_br24)); // For special TLS calls, add another fixup for the symbol. Apparently - // BL8_NOP_ELF, BL8_NOP_ELF_TLSGD, and BL8_NOP_ELF_TLSLD are sufficiently + // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently // similar that TblGen will not generate a separate case for the latter // two, so this is the only way to get the extra fixup generated. unsigned Opcode = MI.getOpcode(); - if (Opcode == PPC::BL8_NOP_ELF_TLSGD || Opcode == PPC::BL8_NOP_ELF_TLSLD) { + if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) { const MCOperand &MO2 = MI.getOperand(OpNo+1); Fixups.push_back(MCFixup::Create(0, MO2.getExpr(), (MCFixupKind)PPC::fixup_ppc_nofixup)); @@ -178,12 +170,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo, return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits; // Add a fixup for the displacement field. - if (isSVR4ABI() && is64BitMode()) - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16)); - else - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo16)); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16)); return RegBits; } @@ -199,13 +187,9 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo, if (MO.isImm()) return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits; - // Add a fixup for the branch target. - if (isSVR4ABI() && is64BitMode()) - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_toc16_ds)); - else - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_lo14)); + // Add a fixup for the displacement field. + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_lo16_ds)); return RegBits; } @@ -220,7 +204,7 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo, // Return the thread-pointer register's encoding. Fixups.push_back(MCFixup::Create(0, MO.getExpr(), (MCFixupKind)PPC::fixup_ppc_tlsreg)); - return getPPCRegisterNumbering(PPC::X13); + return CTX.getRegisterInfo().getEncodingValue(PPC::X13); } unsigned PPCMCCodeEmitter:: @@ -231,7 +215,7 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo, MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MTCRF8) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> getPPCRegisterNumbering(MO.getReg()); + return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } @@ -243,7 +227,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, // The GPR operand should come through here though. assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return getPPCRegisterNumbering(MO.getReg()); + return CTX.getRegisterInfo().getEncodingValue(MO.getReg()); } assert(MO.isImm() && diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h index 4a420929d0..38a7420d97 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h @@ -47,6 +47,10 @@ MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS, uint8_t OSABI); } // End llvm namespace +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + // Defines symbolic names for PowerPC registers. This defines a mapping from // register name to register number. // diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp index 12bb0a1434..d84eb9c6aa 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp @@ -18,7 +18,6 @@ using namespace llvm; PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { switch (Opcode) { - default: llvm_unreachable("Unknown PPC branch opcode!"); case PPC::PRED_EQ: return PPC::PRED_NE; case PPC::PRED_NE: return PPC::PRED_EQ; case PPC::PRED_LT: return PPC::PRED_GE; @@ -28,4 +27,5 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) { case PPC::PRED_NU: return PPC::PRED_UN; case PPC::PRED_UN: return PPC::PRED_NU; } + llvm_unreachable("Unknown PPC branch opcode!"); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h index 972e13852e..ad2b018128 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h @@ -17,11 +17,14 @@ // GCC #defines PPC on Linux but we use it as our namespace name #undef PPC +// Generated files will use "namespace PPC". To avoid symbol clash, +// undefine PPC here. PPC may be predefined on some hosts. +#undef PPC + namespace llvm { namespace PPC { /// Predicate - These are "(BI << 5) | BO" for various predicates. enum Predicate { - PRED_ALWAYS = (0 << 5) | 20, PRED_LT = (0 << 5) | 12, PRED_LE = (1 << 5) | 4, PRED_EQ = (2 << 5) | 12, diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h index f71979f245..446b6854fb 100644 --- a/lib/Target/PowerPC/PPC.h +++ b/lib/Target/PowerPC/PPC.h @@ -15,7 +15,6 @@ #ifndef LLVM_TARGET_POWERPC_H #define LLVM_TARGET_POWERPC_H -#include "MCTargetDesc/PPCBaseInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include <string> diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index 992913602a..a1ea2297bf 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -59,8 +59,18 @@ def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", "Enable the fsqrt instruction">; def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true", "Enable the stfiwx instruction">; +def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true", + "Enable the lfiwax instruction">; +def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true", + "Enable the fri[mnpz] instructions">; +def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true", + "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">; def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true", "Enable the isel instruction">; +def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true", + "Enable the popcnt[dw] instructions">; +def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true", + "Enable the ldbrx instruction">; def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true", "Enable Book E instructions">; def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", @@ -71,15 +81,9 @@ def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true", // // CMPB p6, p6x, p7 cmpb // DFP p6, p6x, p7 decimal floating-point instructions -// FLT_CVT p7 fcfids, fcfidu, fcfidus, fcfiduz, fctiwuz -// FPRND p5x, p6, p6x, p7 frim, frin, frip, friz // FRE p5 through p7 fre (vs. fres, available since p3) // FRSQRTES p5 through p7 frsqrtes (vs. frsqrte, available since p3) -// LDBRX p7 load with byte reversal -// LFIWAX p6, p6x, p7 lfiwax -// LFIWZX p7 lfiwzx // POPCNTB p5 through p7 popcntb and related instructions -// POPCNTD p7 popcntd and related instructions // RECIP_PREC p6, p6x, p7 higher precision reciprocal estimates // VSX p7 vector-scalar instruction set @@ -128,16 +132,18 @@ def : ProcessorModel<"e500mc", PPCE500mcModel, def : ProcessorModel<"e5500", PPCE5500Model, [DirectiveE5500, FeatureMFOCRF, Feature64Bit, FeatureSTFIWX, FeatureBookE, FeatureISEL]>; -def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, FeatureISEL, - Feature64Bit - /*, Feature64BitRegs */]>; -def : Processor<"a2q", PPCA2Itineraries, [DirectiveA2, FeatureBookE, - FeatureMFOCRF, FeatureFSqrt, - FeatureSTFIWX, FeatureISEL, - Feature64Bit /*, Feature64BitRegs */, - FeatureQPX]>; +def : Processor<"a2", PPCA2Itineraries, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */]>; +def : Processor<"a2q", PPCA2Itineraries, + [DirectiveA2, FeatureBookE, FeatureMFOCRF, + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, Feature64Bit + /*, Feature64BitRegs */, FeatureQPX]>; def : Processor<"pwr3", G5Itineraries, [DirectivePwr3, FeatureAltivec, FeatureMFOCRF, FeatureSTFIWX, Feature64Bit]>; @@ -149,18 +155,23 @@ def : Processor<"pwr5", G5Itineraries, FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; def : Processor<"pwr5x", G5Itineraries, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; + FeatureFSqrt, FeatureSTFIWX, FeatureFPRND, + Feature64Bit]>; def : Processor<"pwr6", G5Itineraries, [DirectivePwr6, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - Feature64Bit /*, Feature64BitRegs */]>; + FeatureLFIWAX, FeatureFPRND, Feature64Bit + /*, Feature64BitRegs */]>; def : Processor<"pwr6x", G5Itineraries, [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF, - FeatureFSqrt, FeatureSTFIWX, Feature64Bit]>; + FeatureFSqrt, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, Feature64Bit]>; def : Processor<"pwr7", G5Itineraries, [DirectivePwr7, FeatureAltivec, FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX, - FeatureISEL, Feature64Bit /*, Feature64BitRegs */]>; + FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, + FeatureISEL, FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : Processor<"ppc64", G5Itineraries, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index eae9b7b7fb..74cc1bb762 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -370,7 +370,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *PICBase = MF->getPICBaseSymbol(); // Emit the 'bl'. - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL_Darwin) // Darwin vs SVR4 doesn't matter here. + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL) // FIXME: We would like an efficient form for this, so we don't have to do // a lot of extra uniquing. .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); @@ -458,11 +458,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = LDtocL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to LDrs, which is a form of LD with the offset - // specified by a SymbolLo. If the global address is external, has + // Change the opcode to LD. If the global address is external, has // common linkage, or is a jump table address, then reference the // associated TOC entry. Otherwise reference the symbol directly. - TmpInst.setOpcode(PPC::LDrs); + TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && "Invalid operand for LDtocL!"); @@ -496,10 +495,10 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = ADDItocL %Xs, <ga:@sym> LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to ADDI8L. If the global address is external, then + // Change the opcode to ADDI8. If the global address is external, then // generate a TOC entry and reference that. Otherwise reference the // symbol directly. - TmpInst.setOpcode(PPC::ADDI8L); + TmpInst.setOpcode(PPC::ADDI8); const MachineOperand &MO = MI->getOperand(2); assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); MCSymbol *MOSymbol = 0; @@ -548,9 +547,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to LDrs, which is a form of LD with the offset - // specified by a SymbolLo. - TmpInst.setOpcode(PPC::LDrs); + // Change the opcode to LD. + TmpInst.setOpcode(PPC::LD); const MachineOperand &MO = MI->getOperand(1); const GlobalValue *GValue = MO.getGlobal(); MCSymbol *MOSymbol = Mang->getSymbol(GValue); @@ -579,7 +577,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItlsgdL: { // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym> - // Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l + // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -587,7 +585,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsGD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsGD)); @@ -595,7 +593,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::GETtlsADDR: { // Transform: %X3 = GETtlsADDR %X3, <ga:@sym> - // Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd) + // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd) assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); StringRef Name = "__tls_get_addr"; @@ -608,7 +606,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -631,7 +629,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDItlsldL: { // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym> - // Into: %Xd = ADDI8L %Xs, sym@got@tlsld@l + // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -639,7 +637,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymGotTlsLD = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymGotTlsLD)); @@ -647,7 +645,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::GETtlsldADDR: { // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym> - // Into: BL8_NOP_ELF_TLSLD __tls_get_addr(sym@tlsld) + // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld) assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); StringRef Name = "__tls_get_addr"; @@ -660,7 +658,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSLD) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD) .addExpr(TlsRef) .addExpr(SymVar)); return; @@ -683,7 +681,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case PPC::ADDIdtprelL: { // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym> - // Into: %Xd = ADDI8L %Xs, sym@dtprel@l + // Into: %Xd = ADDI8 %Xs, sym@dtprel@l assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC"); const MachineOperand &MO = MI->getOperand(2); const GlobalValue *GValue = MO.getGlobal(); @@ -691,7 +689,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *SymDtprel = MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO, OutContext); - OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L) + OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(SymDtprel)); @@ -911,18 +909,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { OutStreamer.EmitLabel(Stub); OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol); + const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext); + // mflr r0 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0)); - // FIXME: MCize this. - OutStreamer.EmitRawText("\tbcl 20, 31, " + Twine(AnonSymbol->getName())); + // bcl 20, 31, AnonSymbol + OutStreamer.EmitInstruction(MCInstBuilder(PPC::BCL).addExpr(Anon)); OutStreamer.EmitLabel(AnonSymbol); // mflr r11 OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11)); // addis r11, r11, ha16(LazyPtr - AnonSymbol) const MCExpr *Sub = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext), - MCSymbolRefExpr::Create(AnonSymbol, OutContext), - OutContext); + Anon, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS) .addReg(PPC::R11) .addReg(PPC::R11) diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index b98cc489f6..81a54d7015 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -189,12 +189,23 @@ INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops", /// isCompareEquals - Returns true if the instruction is a compare equals /// instruction with an immediate operand. -static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp) { - if (MI->getOpcode() == PPC::CMPWI || MI->getOpcode() == PPC::CMPDI) { +static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp, + bool &Int64Cmp) { + if (MI->getOpcode() == PPC::CMPWI) { SignedCmp = true; + Int64Cmp = false; return true; - } else if (MI->getOpcode() == PPC::CMPLWI || MI->getOpcode() == PPC::CMPLDI) { + } else if (MI->getOpcode() == PPC::CMPDI) { + SignedCmp = true; + Int64Cmp = true; + return true; + } else if (MI->getOpcode() == PPC::CMPLWI) { + SignedCmp = false; + Int64Cmp = false; + return true; + } else if (MI->getOpcode() == PPC::CMPLDI) { SignedCmp = false; + Int64Cmp = true; return true; } @@ -353,9 +364,9 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end(); RI != RE; ++RI) { IV_Opnd = &RI.getOperand(); - bool SignedCmp; + bool SignedCmp, Int64Cmp; MachineInstr *MI = IV_Opnd->getParent(); - if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp) && + if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) && MI->getOperand(0).getReg() == PredReg) { OldInsts.push_back(MI); @@ -380,14 +391,14 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, assert(InitialValue->isReg() && "Expecting register for init value"); unsigned InitialValueReg = InitialValue->getReg(); - const MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); + MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg); // Here we need to look for an immediate load (an li or lis/ori pair). if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 || DefInstr->getOpcode() == PPC::ORI)) { - int64_t start = (short) DefInstr->getOperand(2).getImm(); - const MachineInstr *DefInstr2 = - MRI->getVRegDef(DefInstr->getOperand(0).getReg()); + int64_t start = DefInstr->getOperand(2).getImm(); + MachineInstr *DefInstr2 = + MRI->getVRegDef(DefInstr->getOperand(1).getReg()); if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 || DefInstr2->getOpcode() == PPC::LIS)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); @@ -399,17 +410,33 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + OldInsts.push_back(DefInstr2); + + // count/iv_value, the trip count, should be positive here. If it + // is negative, that indicates that the counter will wrap. + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 || DefInstr->getOpcode() == PPC::LI)) { DEBUG(dbgs() << " initial constant: " << *DefInstr); - int64_t count = ImmVal - int64_t(short(DefInstr->getOperand(1).getImm())); + int64_t count = ImmVal - + int64_t(short(DefInstr->getOperand(1).getImm())); if ((count % iv_value) != 0) { return 0; } - return new CountValue(count/iv_value); + + OldInsts.push_back(DefInstr); + + if (Int64Cmp) + return new CountValue(count/iv_value); + else + return new CountValue(uint32_t(count/iv_value)); } else if (iv_value == 1 || iv_value == -1) { // We can't determine a constant starting value. if (ImmVal == 0) { @@ -417,8 +444,8 @@ CountValue *PPCCTRLoops::getTripCount(MachineLoop *L, } // FIXME: handle non-zero end value. } - // FIXME: handle non-unit increments (we might not want to introduce division - // but we can handle some 2^n cases with shifts). + // FIXME: handle non-unit increments (we might not want to introduce + // division but we can handle some 2^n cases with shifts). } } @@ -489,9 +516,10 @@ bool PPCCTRLoops::isDead(const MachineInstr *MI, if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (!MRI->use_nodbg_empty(Reg)) { - // This instruction has users, but if the only user is the phi node for the - // parent block, and the only use of that phi node is this instruction, then - // this instruction is dead: both it (and the phi node) can be removed. + // This instruction has users, but if the only user is the phi node for + // the parent block, and the only use of that phi node is this + // instruction, then this instruction is dead: both it (and the phi + // node) can be removed. MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg); if (llvm::next(I) == MRI->use_end() && I.getOperand().getParent()->isPHI()) { @@ -594,6 +622,16 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { DEBUG(dbgs() << "failed to get trip count!\n"); return false; } + + if (TripCount->isImm()) { + DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n"); + + // FIXME: We currently can't form 64-bit constants + // (including 32-bit unsigned constants) + if (!isInt<32>(TripCount->getImm())) + return false; + } + // Does the loop contain any invalid instructions? if (containsInvalidInstruction(L)) { return false; @@ -647,7 +685,7 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { const TargetRegisterClass *SrcRC = MF->getRegInfo().getRegClass(TripCount->getReg()); CountReg = MF->getRegInfo().createVirtualRegister(RC); - unsigned CopyOp = (isPPC64 && SrcRC == GPRC) ? + unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ? (unsigned) PPC::EXTSW_32_64 : (unsigned) TargetOpcode::COPY; BuildMI(*Preheader, InsertPos, dl, @@ -664,13 +702,14 @@ bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) { // Put the trip count in a register for transfer into the count register. int64_t CountImm = TripCount->getImm(); - assert(!TripCount->isNeg() && "Constant trip count must be positive"); + if (TripCount->isNeg()) + CountImm = -CountImm; CountReg = MF->getRegInfo().createVirtualRegister(RC); - if (CountImm > 0xFFFF) { + if (abs64(CountImm) > 0x7FFF) { BuildMI(*Preheader, InsertPos, dl, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), - CountReg).addImm(CountImm >> 16); + CountReg).addImm((CountImm >> 16) & 0xFFFF); unsigned CountReg1 = CountReg; CountReg = MF->getRegInfo().createVirtualRegister(RC); BuildMI(*Preheader, InsertPos, dl, diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td index caeb1796f7..c8a29a3d2c 100644 --- a/lib/Target/PowerPC/PPCCallingConv.td +++ b/lib/Target/PowerPC/PPCCallingConv.td @@ -136,3 +136,9 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAV F27, F28, F29, F30, F31, CR2, CR3, CR4, V20, V21, V22, V23, V24, V25, V26, V27, V28, V29, V30, V31)>; + +def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>; +def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>; + +def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>; + diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index d68bfd12e4..6478718513 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -142,7 +142,7 @@ unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI, assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 || MI.getOpcode() == PPC::MFOCRF) && (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7)); - return 0x80 >> getPPCRegisterNumbering(MO.getReg()); + return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, @@ -260,7 +260,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 && MI.getOpcode() != PPC::MFOCRF) || MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7); - return getPPCRegisterNumbering(MO.getReg()); + return TM.getRegisterInfo()->getEncodingValue(MO.getReg()); } assert(MO.isImm() && diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 0a396e6693..3244b904ee 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -103,6 +103,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) { // transform this into the appropriate ORI instruction. static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); DebugLoc dl = MI->getDebugLoc(); unsigned UsedRegMask = 0; @@ -115,7 +116,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { for (MachineRegisterInfo::livein_iterator I = MF->getRegInfo().livein_begin(), E = MF->getRegInfo().livein_end(); I != E; ++I) { - unsigned RegNo = getPPCRegisterNumbering(I->first); + unsigned RegNo = TRI->getEncodingValue(I->first); if (VRRegNo[RegNo] == I->first) // If this really is a vector reg. UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. } @@ -131,7 +132,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) { const MachineOperand &MO = Ret.getOperand(I); if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) continue; - unsigned RegNo = getPPCRegisterNumbering(MO.getReg()); + unsigned RegNo = TRI->getEncodingValue(MO.getReg()); UsedRegMask &= ~(1 << (31-RegNo)); } } @@ -188,13 +189,31 @@ static bool spillsCR(const MachineFunction &MF) { return FuncInfo->isCRSpilled(); } +static bool spillsVRSAVE(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->isVRSAVESpilled(); +} + +static bool hasSpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasSpills(); +} + +static bool hasNonRISpills(const MachineFunction &MF) { + const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + return FuncInfo->hasNonRISpills(); +} + /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. -void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { +unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, + bool UpdateMF, + bool UseEstimate) const { MachineFrameInfo *MFI = MF.getFrameInfo(); // Get the number of bytes to allocate from the FrameInfo - unsigned FrameSize = MFI->getStackSize(); + unsigned FrameSize = + UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize(); // Get the alignments provided by the target, and the maximum alignment // (if any) of the fixed frame objects. @@ -223,8 +242,9 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { && spillsCR(MF)) && (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment. // No need for frame - MFI->setStackSize(0); - return; + if (UpdateMF) + MFI->setStackSize(0); + return 0; } // Get the maximum call frame size of all the calls. @@ -241,7 +261,8 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; // Update maximum call frame size. - MFI->setMaxCallFrameSize(maxCallFrameSize); + if (UpdateMF) + MFI->setMaxCallFrameSize(maxCallFrameSize); // Include call frame size in total. FrameSize += maxCallFrameSize; @@ -250,7 +271,10 @@ void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const { FrameSize = (FrameSize + AlignMask) & ~AlignMask; // Update frame info. - MFI->setStackSize(FrameSize); + if (UpdateMF) + MFI->setStackSize(FrameSize); + + return FrameSize; } // hasFP - Return true if the specified function actually has a dedicated frame @@ -281,6 +305,31 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } +void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { + bool is31 = needsFP(MF); + unsigned FPReg = is31 ? PPC::R31 : PPC::R1; + unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) + for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { + --MBBI; + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + MachineOperand &MO = MBBI->getOperand(I); + if (!MO.isReg()) + continue; + + switch (MO.getReg()) { + case PPC::FP: + MO.setReg(FPReg); + break; + case PPC::FP8: + MO.setReg(FP8Reg); + break; + } + } + } +} void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB @@ -311,13 +360,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { MBBI = MBB.begin(); // Work out frame sizes. - // FIXME: determineFrameLayout() may change the frame size. This should be - // moved upper, to some hook. - determineFrameLayout(MF); - unsigned FrameSize = MFI->getStackSize(); - + unsigned FrameSize = determineFrameLayout(MF); int NegFrameSize = -FrameSize; + if (MFI->isFrameAddressTaken()) + replaceFPWithRealFP(MF); + // Get processor type. bool isPPC64 = Subtarget.isPPC64(); // Get operating system @@ -780,7 +828,7 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { void PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *) const { const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); // Save and clear the LR state. @@ -822,30 +870,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true); FI->setCRSpillFrameIndex(FrameIdx); } - - // Reserve a slot closest to SP or frame pointer if we have a dynalloc or - // a large stack, which will require scavenging a register to materialize a - // large offset. - // FIXME: this doesn't actually check stack size, so is a bit pessimistic - // FIXME: doesn't detect whether or not we need to spill vXX, which requires - // r0 for now. - - if (RegInfo->requiresRegisterScavenging(MF)) - if (needsFP(MF) || spillsCR(MF)) { - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } } -void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) - const { +void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const { // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) + if (!Subtarget.isSVR4ABI()) { + addScavengingSpillSlot(MF, RS); return; + } // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); @@ -853,6 +886,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) // Early exit if no callee saved registers are modified! if (CSI.empty() && !needsFP(MF)) { + addScavengingSpillSlot(MF, RS); return; } @@ -917,6 +951,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); int64_t LowerBound = 0; @@ -936,7 +971,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } - LowerBound -= (31 - getPPCRegisterNumbering(MinFPR) + 1) * 8; + LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; } // Check whether the frame pointer register is allocated. If so, make sure it @@ -970,8 +1005,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) } unsigned MinReg = - std::min<unsigned>(getPPCRegisterNumbering(MinGPR), - getPPCRegisterNumbering(MinG8R)); + std::min<unsigned>(TRI->getEncodingValue(MinGPR), + TRI->getEncodingValue(MinG8R)); if (Subtarget.isPPC64()) { LowerBound -= (31 - MinReg + 1) * 8; @@ -1031,6 +1066,44 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } } + + addScavengingSpillSlot(MF, RS); +} + +void +PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, + RegScavenger *RS) const { + // Reserve a slot closest to SP or frame pointer if we have a dynalloc or + // a large stack, which will require scavenging a register to materialize a + // large offset. + + // We need to have a scavenger spill slot for spills if the frame size is + // large. In case there is no free register for large-offset addressing, + // this slot is used for the necessary emergency spill. Also, we need the + // slot for dynamic stack allocations. + + // The scavenger might be invoked if the frame offset does not fit into + // the 16-bit immediate. We don't know the complete frame size here + // because we've not yet computed callee-saved register spills or the + // needed alignment padding. + unsigned StackSize = determineFrameLayout(MF, false, true); + MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || + hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC; + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + // These kinds of spills might need two registers. + if (spillsCR(MF) || spillsVRSAVE(MF)) + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); + + } } bool @@ -1068,8 +1141,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // save slot via GPR12 (available in the prolog for 32- and 64-bit). if (Subtarget.isPPC64()) { // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::X12)); - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW)) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::MFCR8), PPC::X12)); + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::STW8)) .addReg(PPC::X12, getKillRegState(true)) .addImm(8) @@ -1109,7 +1182,7 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, if (isPPC64) { // 64-bit: SP+8 - MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ), PPC::X12) + MBB.insert(MI, BuildMI(*MF, DL, TII.get(PPC::LWZ8), PPC::X12) .addImm(8) .addReg(PPC::X1)); RestoreOp = PPC::MTCRF8; @@ -1125,15 +1198,15 @@ restoreCRs(bool isPPC64, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, if (CR2Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); if (CR3Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(!CR4Spilled))); if (CR4Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) - .addReg(MoveReg)); + .addReg(MoveReg, getKillRegState(true))); } void PPCFrameLowering:: diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index d09e47fafd..6f5f9368c6 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -32,7 +32,9 @@ public: Subtarget(sti) { } - void determineFrameLayout(MachineFunction &MF) const; + unsigned determineFrameLayout(MachineFunction &MF, + bool UpdateMF = true, + bool UseEstimate = false) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -41,10 +43,13 @@ public: bool hasFP(const MachineFunction &MF) const; bool needsFP(const MachineFunction &MF) const; + void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; + void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -144,6 +149,9 @@ public: return 0; } + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + static const SpillSlot Offsets[] = { // Floating-point register save area offsets. {PPC::F31, -8}, diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 6ed1fb9e6a..4bf1e33964 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -179,7 +179,7 @@ getHazardType(SUnit *SU, int Stalls) { } // Do not allow MTCTR and BCTRL to be in the same dispatch group. - if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4)) + if (HasCTRSet && Opcode == PPC::BCTRL) return NoopHazard; // If this is a load following a store, make sure it's not to the same or diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 17bea8a6a6..95efc11b53 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -120,10 +120,10 @@ namespace { } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc - /// immediate field. Because preinc imms have already been validated, just - /// accept it. + /// immediate field. Note that the operand at this point is already the + /// result of a prior SelectAddressRegImm call. bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || + if (N.getOpcode() == ISD::TargetConstant || N.getOpcode() == ISD::TargetGlobalAddress) { Out = N; return true; @@ -132,18 +132,6 @@ namespace { return false; } - /// SelectAddrIdxOffs - Return true if the operand is valid for a preinc - /// index field. Because preinc imms have already been validated, just - /// accept it. - bool SelectAddrIdxOffs(SDValue N, SDValue &Out) const { - if (isa<ConstantSDNode>(N) || N.getOpcode() == PPCISD::Lo || - N.getOpcode() == ISD::TargetGlobalAddress) - return false; - - Out = N; - return true; - } - /// SelectAddrIdx - Given the specified addressed, check to see if it can be /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. @@ -164,6 +152,12 @@ namespace { return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG); } + // Select an address into a single register. + bool SelectAddr(SDValue N, SDValue &Base) { + Base = N; + return true; + } + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. It is always correct to compute the value into /// a register. The case of adding a (possibly relocatable) constant to a @@ -1050,7 +1044,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { break; SDValue Offset = LD->getOffset(); - if (isa<ConstantSDNode>(Offset) || + if (Offset.getOpcode() == ISD::TargetConstant || Offset.getOpcode() == ISD::TargetGlobalAddress) { unsigned Opcode; @@ -1117,7 +1111,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); - SDValue Ops[] = { Offset, Base, Chain }; + SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), PPCLowering.getPointerTy(), MVT::Other, Ops, 3); @@ -1483,8 +1477,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { default: continue; case PPC::ADDI8: - case PPC::ADDI8L: - case PPC::ADDIL: + case PPC::ADDI: // In some cases (such as TLS) the relocation information // is already in place on the operand, so copying the operand // is sufficient. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 13cb358fc0..2cceb3d312 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -57,6 +57,9 @@ cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); +static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", +cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); + static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { if (TM.getSubtargetImpl()->isDarwin()) return new TargetLoweringObjectFileMachO(); @@ -67,6 +70,7 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); + PPCRegInfo = TM.getRegisterInfo(); setPow2DivIsCheap(); @@ -154,18 +158,45 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + if (Subtarget->hasFPRND()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + + // frin does not implement "ties to even." Thus, this is safe only in + // fast-math mode. + if (TM.Options.UnsafeFPMath) { + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + + // These need to set FE_INEXACT, and use a custom inserter. + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + } + } + // PowerPC does not have BSWAP, CTPOP or CTTZ setOperationAction(ISD::BSWAP, MVT::i32 , Expand); - setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); setOperationAction(ISD::BSWAP, MVT::i64 , Expand); - setOperationAction(ISD::CTPOP, MVT::i64 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + if (Subtarget->hasPOPCNTD()) { + setOperationAction(ISD::CTPOP, MVT::i32 , Legal); + setOperationAction(ISD::CTPOP, MVT::i64 , Legal); + } else { + setOperationAction(ISD::CTPOP, MVT::i32 , Expand); + setOperationAction(ISD::CTPOP, MVT::i64 , Expand); + } + // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); @@ -208,6 +239,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support + // SjLj exception handling but a light-weight setjmp/longjmp replacement to + // support continuation, user-level threading, and etc.. As a result, no + // other SjLj exception interfaces are implemented and please don't build + // your own exception handling based on them. + // LLVM/Clang supports zero-cost DWARF exception handling. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. @@ -287,15 +326,28 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - // FIXME: disable this lowered code. This generates 64-bit register values, - // and we don't model the fact that the top part is clobbered by calls. We - // need to flag these together so that the value isn't live across a call. - //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } + // With the instructions enabled under FPCVT, we can do everything. + if (PPCSubTarget.hasFPCVT()) { + if (Subtarget->has64BitSupport()) { + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + } + + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + } + if (Subtarget->use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); @@ -508,7 +560,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) MaxStoresPerMemmoveOptSize = 8; setPrefFunctionAlignment(4); - BenefitFromCodePlacementOpt = true; } } @@ -554,16 +605,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; - case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; - case PPCISD::STD_32: return "PPCISD::STD_32"; - case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; - case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; - case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; - case PPCISD::NOP: return "PPCISD::NOP"; + case PPCISD::CALL: return "PPCISD::CALL"; + case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; - case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; + case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; + case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; + case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFCR: return "PPCISD::MFCR"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMPo: return "PPCISD::VCMPo"; @@ -573,10 +621,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::MFFS: return "PPCISD::MFFS"; - case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; - case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; - case PPCISD::MTFSF: return "PPCISD::MTFSF"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; @@ -1028,7 +1073,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1077,7 +1122,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1140,7 +1185,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1178,15 +1223,19 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SelectionDAG &DAG) const { if (DisablePPCPreinc) return false; + bool isLoad = true; SDValue Ptr; EVT VT; + unsigned Alignment; if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); - + Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); + Alignment = ST->getAlignment(); + isLoad = false; } else return false; @@ -1194,7 +1243,25 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (VT.isVector()) return false; - if (SelectAddressRegReg(Ptr, Offset, Base, DAG)) { + if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { + + // Common code will reject creating a pre-inc form if the base pointer + // is a frame index, or if N is a store and the base pointer is either + // the same as or a predecessor of the value being stored. Check for + // those situations here, and try with swapped Base/Offset instead. + bool Swap = false; + + if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) + Swap = true; + else if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) + Swap = true; + } + + if (Swap) + std::swap(Base, Offset); + AM = ISD::PRE_INC; return true; } @@ -1205,6 +1272,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) return false; } else { + // LDU/STU need an address with at least 4-byte alignment. + if (Alignment < 4) + return false; + // reg + imm * 4. if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) return false; @@ -3096,7 +3167,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Other); // Returns a chain NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. - unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin; + unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { @@ -3229,8 +3300,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Other); NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); - CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin; + CallOpc = PPCISD::BCTRL; Callee.setNode(0); + // Add use of X11 (holding environment pointer) + if (isSVR4ABI && isPPC64) + Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); @@ -3369,7 +3443,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - if (CallOpc == PPCISD::BCTRL_SVR4) { + if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. // See PrepareCall() for more information about calls through function @@ -3380,9 +3454,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. needsTOCRestore = true; - } else if ((CallOpc == PPCISD::CALL_SVR4) && !isLocalCall(Callee)) { + } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { // Otherwise insert NOP for non-local calls. - CallOpc = PPCISD::CALL_NOP_SVR4; + CallOpc = PPCISD::CALL_NOP; } } @@ -4555,6 +4629,21 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); } +SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, + DAG.getVTList(MVT::i32, MVT::Other), + Op.getOperand(0), Op.getOperand(1)); +} + +SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc DL = Op.getDebugLoc(); + return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { @@ -4642,37 +4731,72 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : - PPCISD::FCTIDZ, + (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : + PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); + assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && + "i64 FP_TO_UINT is supported only with FPCVT"); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : + PPCISD::FCTIDUZ, + dl, MVT::f64, Src); break; } // Convert the FP value to an int value through memory. - SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); + bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && + (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); + SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); + int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); // Emit a store to the stack slot. - SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, - MachinePointerInfo(), false, false, 0); + SDValue Chain; + if (i32Stack) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); + SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; + Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + MVT::i32, MMO); + } else + Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, + MPI, false, false, 0); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias. - if (Op.getValueType() == MVT::i32) + if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); - return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), + MPI = MachinePointerInfo(); + } + + return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, false, false, false, 0); } -SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, +SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); + assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && + "UINT_TO_FP is supported only with FPCVT"); + + // If we have FCFIDS, then use it when converting to single-precision. + // Otherwise, convert to double-prcision and then round. + unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDUS : PPCISD::FCFIDS) : + (Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::FCFIDU : PPCISD::FCFID); + MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + MVT::f32 : MVT::f64; + if (Op.getOperand(0).getValueType() == MVT::i64) { SDValue SINT = Op.getOperand(0); // When converting to single-precision, we actually need to convert @@ -4686,6 +4810,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && + !PPCSubTarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this @@ -4719,44 +4844,69 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); + + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } assert(Op.getOperand(0).getValueType() == MVT::i32 && - "Unhandled SINT_TO_FP type in custom expander!"); + "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); - SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32, + SDValue Ld; + if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { + int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && + "Expected an i32 store"); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, 4, 4); + SDValue Ops[] = { Store, FIdx }; + Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? + PPCISD::LFIWZX : PPCISD::LFIWAX, + dl, DAG.getVTList(MVT::f64, MVT::Other), + Ops, 2, MVT::i32, MMO); + } else { + assert(PPCSubTarget.isPPC64() && + "i32->FP without LFIWAX supported only on PPC64"); + + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); + SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); + + SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Op.getOperand(0)); - // STD the extended value into the stack slot. - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, 8, 8); - SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx }; - SDValue Store = - DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other), - Ops, 4, MVT::i64, MMO); - // Load the value as a double. - SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), - false, false, false, 0); + // STD the extended value into the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, 0); + + // Load the value as a double. + Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, + MachinePointerInfo::getFixedStack(FrameIdx), + false, false, false, 0); + } // FCFID it and return it. - SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); - if (Op.getValueType() == MVT::f32) + SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); + if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } @@ -5551,11 +5701,15 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, Op.getDebugLoc()); - case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. @@ -5609,50 +5763,8 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, MVT::f64, N->getOperand(0), DAG.getIntPtrConstant(1)); - // This sequence changes FPSCR to do round-to-zero, adds the two halves - // of the long double, and puts FPSCR back the way it was. We do not - // actually model FPSCR. - std::vector<EVT> NodeTys; - SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; - - NodeTys.push_back(MVT::f64); // Return register - NodeTys.push_back(MVT::Glue); // Returns a flag for later insns - Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); - MFFSreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(31, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = DAG.getConstant(30, MVT::i32); - Ops[1] = InFlag; - Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2); - InFlag = Result.getValue(0); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); // result of add - NodeTys.push_back(MVT::Glue); // Returns a flag - Ops[0] = Lo; - Ops[1] = Hi; - Ops[2] = InFlag; - Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3); - FPreg = Result.getValue(0); - InFlag = Result.getValue(1); - - NodeTys.clear(); - NodeTys.push_back(MVT::f64); - Ops[0] = DAG.getConstant(1, MVT::i32); - Ops[1] = MFFSreg; - Ops[2] = FPreg; - Ops[3] = InFlag; - Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4); - FPreg = Result.getValue(0); + // Add the two halves of the long double in round-to-zero mode. + SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); // We know the low half is about to be thrown away, so just use something // convenient. @@ -5744,7 +5856,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -5863,9 +5975,238 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, return BB; } +llvm::MachineBasicBlock* +PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = MBB; + ++I; + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + unsigned DstReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(RC->hasType(MVT::i32) && "Invalid destination!"); + unsigned mainDstReg = MRI.createVirtualRegister(RC); + unsigned restoreDstReg = MRI.createVirtualRegister(RC); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + // For v = setjmp(buf), we generate + // + // thisMBB: + // SjLjSetup mainMBB + // bl mainMBB + // v_restore = 1 + // b sinkMBB + // + // mainMBB: + // buf[LabelOffset] = LR + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // Note that the structure of the jmp_buf used here is not compatible + // with that used by libc, and is not designed to be. Specifically, it + // stores only those 'reserved' registers that LLVM does not otherwise + // understand how to spill. Also, by convention, by the time this + // intrinsic is called, Clang has already stored the frame address in the + // first slot of the buffer and stack address in the third. Following the + // X86 target code, we'll store the jump address in the second slot. We also + // need to save the TOC pointer (R2) to handle jumps between shared + // libraries, and that will be stored in the fourth slot. The thread + // identifier (R13) is not affected. + + // thisMBB: + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + // Prepare IP either in reg. + const TargetRegisterClass *PtrRC = getRegClassFor(PVT); + unsigned LabelReg = MRI.createVirtualRegister(PtrRC); + unsigned BufReg = MI->getOperand(1).getReg(); + + if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) + .addReg(PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Setup + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCL)).addMBB(mainMBB); + MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); + + BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); + + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) + .addMBB(mainMBB); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); + + thisMBB->addSuccessor(mainMBB, /* weight */ 0); + thisMBB->addSuccessor(sinkMBB, /* weight */ 1); + + // mainMBB: + // mainDstReg = 0 + MIB = BuildMI(mainMBB, DL, + TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + + // Store IP + if (PPCSubTarget.isPPC64()) { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) + .addReg(LabelReg) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) + .addReg(LabelReg) + .addImm(LabelOffset) + .addReg(BufReg); + } + + MIB.setMemRefs(MMOBegin, MMOEnd); + + BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(PPC::PHI), DstReg) + .addReg(mainDstReg).addMBB(mainMBB) + .addReg(restoreDstReg).addMBB(thisMBB); + + MI->eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const { + DebugLoc DL = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // Memory Reference + MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); + MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); + + MVT PVT = getPointerTy(); + assert((PVT == MVT::i64 || PVT == MVT::i32) && + "Invalid Pointer Size!"); + + const TargetRegisterClass *RC = + (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; + unsigned Tmp = MRI.createVirtualRegister(RC); + // Since FP is only updated here but NOT referenced, it's treated as GPR. + unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; + unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; + + MachineInstrBuilder MIB; + + const int64_t LabelOffset = 1 * PVT.getStoreSize(); + const int64_t SPOffset = 2 * PVT.getStoreSize(); + const int64_t TOCOffset = 3 * PVT.getStoreSize(); + + unsigned BufReg = MI->getOperand(0).getReg(); + + // Reload FP (the jumped-to function may not have had a + // frame pointer, and if so, then its r31 will be restored + // as necessary). + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) + .addImm(0) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) + .addImm(0) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload IP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) + .addImm(LabelOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) + .addImm(LabelOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // Reload SP + if (PVT == MVT::i64) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) + .addImm(SPOffset / 4) + .addReg(BufReg); + } else { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) + .addImm(SPOffset) + .addReg(BufReg); + } + MIB.setMemRefs(MMOBegin, MMOEnd); + + // FIXME: When we also support base pointers, that register must also be + // restored here. + + // Reload TOC + if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) + .addImm(TOCOffset / 4) + .addReg(BufReg); + + MIB.setMemRefs(MMOBegin, MMOEnd); + } + + // Jump + BuildMI(*MBB, MI, DL, + TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); + BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); + + MI->eraseFromParent(); + return MBB; +} + MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || + MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { + return emitEHSjLjSetJmp(MI, BB); + } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || + MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { + return emitEHSjLjLongJmp(MI, BB); + } + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -5883,24 +6224,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); - // The SelectPred is ((BI << 5) | BO) for a BCC - unsigned BO = SelectPred & 0xF; - assert((BO == 12 || BO == 4) && "invalid predicate BO field for isel"); - - unsigned TrueOpNo, FalseOpNo; - if (BO == 12) { - TrueOpNo = 2; - FalseOpNo = 3; - } else { - TrueOpNo = 3; - FalseOpNo = 2; - SelectPred = PPC::InvertPredicate((PPC::Predicate)SelectPred); + unsigned SubIdx; + bool SwapOps; + switch (SelectPred) { + default: llvm_unreachable("invalid predicate for isel"); + case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break; + case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break; + case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break; + case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break; + case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break; + case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break; + case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break; + case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break; } BuildMI(*BB, MI, dl, TII->get(OpCode), MI->getOperand(0).getReg()) - .addReg(MI->getOperand(TrueOpNo).getReg()) - .addReg(MI->getOperand(FalseOpNo).getReg()) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()); + .addReg(MI->getOperand(SwapOps? 3 : 2).getReg()) + .addReg(MI->getOperand(SwapOps? 2 : 3).getReg()) + .addReg(MI->getOperand(1).getReg(), 0, SubIdx); } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_CC_F4 || @@ -6133,7 +6474,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); - unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; + unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB @@ -6236,6 +6577,75 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) .addReg(ShiftReg); + } else if (MI->getOpcode() == PPC::FADDrtz) { + // This pseudo performs an FADD with rounding mode temporarily forced + // to round-to-zero. We emit this via custom inserter since the FPSCR + // is not modeled at the SelectionDAG level. + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src1 = MI->getOperand(1).getReg(); + unsigned Src2 = MI->getOperand(2).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + + // Save FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); + + // Set rounding mode to round-to-zero. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); + + // Perform addition. + BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); + + // Restore FPSCR value. + BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); + } else if (MI->getOpcode() == PPC::FRINDrint || + MI->getOpcode() == PPC::FRINSrint) { + bool isf32 = MI->getOpcode() == PPC::FRINSrint; + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + DebugLoc dl = MI->getDebugLoc(); + + MachineRegisterInfo &RegInfo = F->getRegInfo(); + unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); + + // Perform the rounding. + BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) + .addReg(Src); + + // Compare the results. + BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) + .addReg(Dest).addReg(Src); + + // If the results were not equal, then set the FPSCR XX bit. + MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, midMBB); + F->insert(It, exitMBB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) + .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); + + BB->addSuccessor(midMBB); + BB->addSuccessor(exitMBB); + + BB = midMBB; + + // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set + // the FI bit here because that will not automatically set XX also, + // and XX is what libm interprets as the FE_INEXACT flag. + BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); + BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); + + BB->addSuccessor(exitMBB); + + BB = exitMBB; } else { llvm_unreachable("Unexpected instr type to insert"); } @@ -6321,8 +6731,15 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); - Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val, - N->getOperand(2), N->getOperand(3)); + SDValue Ops[] = { + N->getOperand(0), Val, N->getOperand(2), + DAG.getValueType(N->getOperand(1).getValueType()) + }; + + Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, + DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + cast<StoreSDNode>(N)->getMemoryVT(), + cast<StoreSDNode>(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } @@ -6332,7 +6749,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(1).getOpcode() == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (N->getOperand(1).getValueType() == MVT::i32 || - N->getOperand(1).getValueType() == MVT::i16)) { + N->getOperand(1).getValueType() == MVT::i16 || + (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && + TM.getSubtarget<PPCSubtarget>().isPPC64() && + N->getOperand(1).getValueType() == MVT::i64))) { SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) @@ -6353,7 +6773,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && - (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { + (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || + (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && + TM.getSubtarget<PPCSubtarget>().isPPC64() && + N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast<LoadSDNode>(Load); // Create the byte-swapping load. @@ -6364,8 +6787,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, - DAG.getVTList(MVT::i32, MVT::Other), Ops, 3, - LD->getMemoryVT(), LD->getMemOperand()); + DAG.getVTList(N->getValueType(0) == MVT::i64 ? + MVT::i64 : MVT::i32, MVT::Other), + Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; @@ -6622,6 +7046,9 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 + if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); + return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); @@ -6806,13 +7233,16 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); - bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) || - MFI->hasVarSizedObjects()) && - MFI->getStackSize() && - !MF.getFunction()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked); - unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : - (is31 ? PPC::R31 : PPC::R1); + + // Naked functions never have a frame pointer, and so we use r1. For all + // other functions, this decision must be delayed until during PEI. + unsigned FrameReg; + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) + FrameReg = isPPC64 ? PPC::X1 : PPC::R1; + else + FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) @@ -6851,6 +7281,32 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, + bool *Fast) const { + if (DisablePPCUnaligned) + return false; + + // PowerPC supports unaligned memory access for simple non-vector types. + // Although accessing unaligned addresses is not as efficient as accessing + // aligned addresses, it is generally more efficient than manual expansion, + // and generally only traps for software emulation when crossing page + // boundaries. + + if (!VT.isSimple()) + return false; + + if (VT.getSimpleVT().isVector()) + return false; + + if (VT == MVT::ppcf128) + return false; + + if (Fast) + *Fast = true; + + return true; +} + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3931384d89..6690899e5d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -16,6 +16,7 @@ #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H #include "PPC.h" +#include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -35,14 +36,18 @@ namespace llvm { /// was temporarily in the f64 operand. FCFID, + /// Newer FCFID[US] integer-to-floating-point conversion instructions for + /// unsigned integers and single-precision outputs. + FCFIDU, FCFIDS, FCFIDUS, + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, - /// STFIWX - The STFIWX instruction. The first operand is an input token - /// chain, then an f64 value to store, then an address to store it to. - STFIWX, + /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for + /// unsigned integers. + FCTIDUZ, FCTIWUZ, // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. @@ -90,17 +95,10 @@ namespace llvm { /// code. SRL, SRA, SHL, - /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" - /// registers. - EXTSW_32, - /// CALL - A direct function call. - /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// CALL_NOP is a call with the special NOP which follows 64-bit /// SVR4 calls. - CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, - - /// NOP - Special NOP which follows 64-bit SVR4 calls. - NOP, + CALL, CALL_NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. @@ -108,7 +106,7 @@ namespace llvm { /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. - BCTRL_Darwin, BCTRL_SVR4, + BCTRL, /// Return with a flag operand, matched by 'blr' RET_FLAG, @@ -119,6 +117,12 @@ namespace llvm { /// are undefined. MFCR, + // EH_SJLJ_SETJMP - SjLj exception handling setjmp. + EH_SJLJ_SETJMP, + + // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. + EH_SJLJ_LONGJMP, + /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* /// instructions. For lack of better number, we use the opcode number /// encoding for the OPC field to identify the compare. For example, 838 @@ -138,26 +142,13 @@ namespace llvm { /// an optional input flag argument. COND_BRANCH, - // The following 5 instructions are used only as part of the - // long double-to-int conversion sequence. - - /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the - /// register. - MFFS, - - /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR. - MTFSB0, - - /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR. - MTFSB1, - - /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the - /// FPSCR-setting instructions. + /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding + /// towards zero. Used only as part of the long double-to-int + /// conversion sequence. FADDRTZ, - /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR. - MTFSF, + /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. + MFFS, /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and /// reserve indexed. This is used to implement atomic operations. @@ -243,14 +234,11 @@ namespace llvm { /// optimizations due to constant folding. VADD_SPLAT, - /// STD_32 - This is the STD instruction for use with "32-bit" registers. - STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. - STBRX, + STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE, /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, @@ -258,6 +246,20 @@ namespace llvm { /// or i32. LBRX, + /// STFIWX - The STFIWX instruction. The first operand is an input token + /// chain, then an f64 value to store, then an address to store it to. + STFIWX, + + /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point + /// load which sign-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWAX, + + /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point + /// load which zero-extends from a 32-bit integer value into the + /// destination 64-bit register. + LFIWZX, + /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model, /// produces an ADDIS8 instruction that adds the TOC base register to /// sym@toc@ha. @@ -321,6 +323,7 @@ namespace llvm { class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; + const PPCRegisterInfo *PPCRegInfo; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -395,6 +398,12 @@ namespace llvm { MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; + MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + + MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, + MachineBasicBlock *MBB) const; + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -449,6 +458,10 @@ namespace llvm { bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const; + /// Is unaligned memory access allowed for the given type, and is it fast + /// relative to software emulation. + virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const; + /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd @@ -494,7 +507,7 @@ namespace llvm { const PPCSubtarget &Subtarget) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; @@ -604,6 +617,9 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + + SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 01201304f7..fa5b65f0ba 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -30,12 +30,7 @@ def symbolLo64 : Operand<i64> { let EncoderMethod = "getLO16Encoding"; } def tocentry : Operand<iPTR> { - let MIOperandInfo = (ops i32imm:$imm); -} -def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64 - let PrintMethod = "printMemRegImm"; - let EncoderMethod = "getMemRIXEncoding"; - let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg); + let MIOperandInfo = (ops i64imm:$imm); } def tlsreg : Operand<i64> { let EncoderMethod = "getTLSRegEncoding"; @@ -71,133 +66,112 @@ def HI48_64 : SDNodeXForm<imm, [{ // Calls. // +let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { + let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in + def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; +} + let Defs = [LR8] in def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>, PPC970_Unit_BRU; -// Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { - // Convenient aliases for call instructions - let Uses = [RM] in { - def BL8_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA8_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>; - } - let Uses = [CTR8, RM] in { - def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>; +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { + let Defs = [CTR8], Uses = [CTR8] in { + def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), + "bdz $dst">; + def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), + "bdnz $dst">; } } -// ELF 64 ABI Calls = Darwin ABI Calls -// Used to define BL8_ELF and BLA8_ELF let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL8_ELF : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. + def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", BrB, []>; // See Pat patterns below. - let isCodeGenOnly = 1 in - def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func), + "bla $func", BrB, [(PPCcall (i64 imm:$func))]>; + } + let Uses = [RM], isCodeGenOnly = 1 in { + def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func), "bl $func\n\tnop", BrB, []>; - let isCodeGenOnly = 1 in - def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, + def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func, tlsgd:$sym), "bl $func($sym)\n\tnop", BrB, []>; - let isCodeGenOnly = 1 in - def BL8_NOP_ELF_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, + def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24, (outs), (ins calltarget:$func, tlsgd:$sym), "bl $func($sym)\n\tnop", BrB, []>; - def BLA8_ELF : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; - - let isCodeGenOnly = 1 in - def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24, (outs), (ins aaddr:$func), "bla $func\n\tnop", BrB, - [(PPCcall_nop_SVR4 (i64 imm:$func))]>; + [(PPCcall_nop (i64 imm:$func))]>; } - let Uses = [X11, CTR8, RM] in { - def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>; + let Uses = [CTR8, RM] in { + def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", BrB, [(PPCbctrl)]>, + Requires<[In64BitMode]>; } } // Calls -def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)), - (BL8_Darwin tglobaladdr:$dst)>; -def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), - (BL8_Darwin texternalsym:$dst)>; +def : Pat<(PPCcall (i64 tglobaladdr:$dst)), + (BL8 tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)), + (BL8_NOP tglobaladdr:$dst)>; -def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), - (BL8_ELF tglobaladdr:$dst)>; -def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), - (BL8_NOP_ELF tglobaladdr:$dst)>; - -def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), - (BL8_ELF texternalsym:$dst)>; -def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), - (BL8_NOP_ELF texternalsym:$dst)>; - -def : Pat<(PPCnop), - (NOP)>; +def : Pat<(PPCcall (i64 texternalsym:$dst)), + (BL8 texternalsym:$dst)>; +def : Pat<(PPCcall_nop (i64 texternalsym:$dst)), + (BL8_NOP texternalsym:$dst)>; // Atomic operations let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_ADD_I64", - [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_SUB_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_SUB_I64", - [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_OR_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_OR_I64", - [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_XOR_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_XOR_I64", - [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_AND_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_AND_i64", - [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_LOAD_NAND_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "#ATOMIC_LOAD_NAND_I64", - [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>; + [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>; def ATOMIC_CMP_SWAP_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "#ATOMIC_CMP_SWAP_I64", - [(set G8RC:$dst, - (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>; + [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>; def ATOMIC_SWAP_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "#ATOMIC_SWAP_I64", - [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>; + [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>; } } // Instructions to support atomic operations def LDARX : XForm_1<31, 84, (outs G8RC:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr", LdStLDARX, - [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>; + [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; let Defs = [CR0] in def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst), "stdcx. $rS, $dst", LdStSTDCX, - [(PPCstcx G8RC:$rS, xoaddr:$dst)]>, + [(PPCstcx i64:$rS, xoaddr:$dst)]>, isDOT; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in @@ -216,17 +190,12 @@ def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset), "#TC_RETURNr8 $dst $offset", []>; +let isCodeGenOnly = 1 in { let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, - isIndirectBranch = 1, isCall = 1, Uses = [CTR8, RM] in { - let isReturn = 1 in { - def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In64BitMode]>; - } - - def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, - Requires<[In64BitMode]>; -} + isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in +def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, + Requires<[In64BitMode]>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, @@ -242,6 +211,8 @@ def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +} + def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm), (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>; @@ -251,20 +222,13 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm), (TCRETURNri8 CTRRC8:$dst, imm:$imm)>; -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { - let Defs = [CTR8], Uses = [CTR8] in { - def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst), - "bdz $dst">; - def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst), - "bdnz $dst">; - } -} -// 64-but CR instructions +// 64-bit CR instructions def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; +let isCodeGenOnly = 1 in def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM), "#MFCR8pseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -273,6 +237,18 @@ def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins), "mfcr $rT", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def EH_SjLj_SetJmp64 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP64", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In64BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP64", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In64BitMode]>; +} + //===----------------------------------------------------------------------===// // 64-bit SPR manipulation instrs. @@ -281,13 +257,13 @@ def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in { +let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in { def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Pattern = [(set G8RC:$rT, readcyclecounter)] in +let Pattern = [(set i64:$rT, readcyclecounter)] in def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), "mfspr $rT, 268", SprMFTB>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -298,8 +274,8 @@ def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs G8RC:$rT), (ins), let Defs = [X1], Uses = [X1] in def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"#DYNALLOC8", - [(set G8RC:$result, - (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>; + [(set i64:$result, + (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>; let Defs = [LR8] in { def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS), @@ -321,131 +297,129 @@ let PPC970_Unit = 1 in { // FXU Operations. let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI8 : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm), "li $rD, $imm", IntSimple, - [(set G8RC:$rD, immSExt16:$imm)]>; + [(set i64:$rD, immSExt16:$imm)]>; def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm), "lis $rD, $imm", IntSimple, - [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>; + [(set i64:$rD, imm16ShiftedSExt:$imm)]>; } // Logical ops. def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "nand $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>; def AND8 : XForm_6<31, 28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "and $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (and i64:$rS, i64:$rB))]>; def ANDC8: XForm_6<31, 60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "andc $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>; + [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>; def OR8 : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "or $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (or i64:$rS, i64:$rB))]>; def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "nor $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>; def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "orc $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>; + [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>; def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "eqv $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>; + [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>; def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB), "xor $rA, $rS, $rB", IntSimple, - [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>; + [(set i64:$rA, (xor i64:$rS, i64:$rB))]>; // Logical ops with immediate. def ANDIo8 : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, - [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>, + [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>, isDOT; def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, - [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>, + [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>, isDOT; def ORI8 : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>; + [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>; def ORIS8 : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>; def XORI8 : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>; + [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>; def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, - [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>; def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "add $rT, $rA, $rB", IntSimple, - [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (add i64:$rA, i64:$rB))]>; // ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the // initial-exec thread-local storage model. +let isCodeGenOnly = 1 in def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB), "add $rT, $rA, $rB@tls", IntSimple, - [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>; + [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>; let Defs = [CARRY] in { def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "addc $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>, + [(set i64:$rT, (addc i64:$rA, i64:$rB))]>, PPC970_DGroup_Cracked; def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>; + [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>; } -def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), - "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDI8L : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, symbolLo64:$imm), +def ADDI8 : DForm_2<14, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolLo64:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>; -def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm), + [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>; +def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC_NOX0:$rA, symbolHi64:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>; + [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>; let Defs = [CARRY] in { def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>; + [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>; def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subfc $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>, + [(set i64:$rT, (subc i64:$rB, i64:$rA))]>, PPC970_DGroup_Cracked; } def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subf $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>; + [(set i64:$rT, (sub i64:$rB, i64:$rA))]>; def NEG8 : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA), "neg $rT, $rA", IntSimple, - [(set G8RC:$rT, (ineg G8RC:$rA))]>; + [(set i64:$rT, (ineg i64:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "adde $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (adde i64:$rA, i64:$rB))]>; def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, -1))]>; + [(set i64:$rT, (adde i64:$rA, -1))]>; def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA), "addze $rT, $rA", IntGeneral, - [(set G8RC:$rT, (adde G8RC:$rA, 0))]>; + [(set i64:$rT, (adde i64:$rA, 0))]>; def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "subfe $rT, $rA, $rB", IntGeneral, - [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>; + [(set i64:$rT, (sube i64:$rB, i64:$rA))]>; def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfme $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube -1, G8RC:$rA))]>; + [(set i64:$rT, (sube -1, i64:$rA))]>; def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA), "subfze $rT, $rA", IntGeneral, - [(set G8RC:$rT, (sube 0, G8RC:$rA))]>; + [(set i64:$rT, (sube 0, i64:$rA))]>; } def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulhd $rT, $rA, $rB", IntMulHW, - [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>; def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulhdu $rT, $rA, $rB", IntMulHWU, - [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>; + [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>; def CMPD : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB), "cmpd $crD, $rA, $rB", IntCompare>, isPPC64; @@ -458,54 +432,60 @@ def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2), def SLD : XForm_6<31, 27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "sld $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64; def SRD : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srd $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64; let Defs = [CARRY] in { def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB), "srad $rA, $rS, $rB", IntRotateD, - [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64; + [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64; } def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS), "extsb $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>; + [(set i64:$rA, (sext_inreg i64:$rS, i8))]>; def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS), "extsh $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>; + [(set i64:$rA, (sext_inreg i64:$rS, i16))]>; def EXTSW : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS), "extsw $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64; -/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers. -def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS), - "extsw $rA, $rS", IntSimple, - [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64; + [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64; def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS), "extsw $rA, $rS", IntSimple, - [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64; + [(set i64:$rA, (sext i32:$rS))]>, isPPC64; let Defs = [CARRY] in { def SRADI : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH), "sradi $rA, $rS, $SH", IntRotateDI, - [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64; + [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64; } def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS), "cntlzd $rA, $rS", IntGeneral, - [(set G8RC:$rA, (ctlz G8RC:$rS))]>; + [(set i64:$rA, (ctlz i64:$rS))]>; +def POPCNTD : XForm_11<31, 506, (outs G8RC:$rA), (ins G8RC:$rS), + "popcntd $rA, $rS", IntGeneral, + [(set i64:$rA, (ctpop i64:$rS))]>; + +// popcntw also does a population count on the high 32 bits (storing the +// results in the high 32-bits of the output). We'll ignore that here (which is +// safe because we never separately use the high part of the 64-bit registers). +def POPCNTW : XForm_11<31, 378, (outs GPRC:$rA), (ins GPRC:$rS), + "popcntw $rA, $rS", IntGeneral, + [(set i32:$rA, (ctpop i32:$rS))]>; def DIVD : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "divd $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "divdu $rT, $rA, $rB", IntDivD, - [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64, + [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64, PPC970_DGroup_First, PPC970_DGroup_Cracked; def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB), "mulld $rT, $rA, $rB", IntMulHD, - [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64; + [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64; let isCommutable = 1 in { @@ -536,7 +516,7 @@ def RLWINM8 : MForm_2<21, []>; def ISEL8 : AForm_4<31, 15, - (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB, pred:$cond), + (outs G8RC:$rT), (ins G8RC_NOX0:$rA, G8RC:$rB, CRBITRC:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } // End FXU Operations. @@ -551,94 +531,96 @@ def ISEL8 : AForm_4<31, 15, let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>, + [(set i64:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LWA : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src), "lwa $rD, $src", LdStLWA, - [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64, + [(set i64:$rD, + (aligned4sextloadi32 ixaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>, + [(set i64:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src), "lwax $rD, $src", LdStLHA, - [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, + [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64, PPC970_DGroup_Cracked; // Update forms. -let mayLoad = 1 in -def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp, - ptr_rc:$rA), - "lhau $rD, $disp($rA)", LdStLHAU, - []>, RegConstraint<"$rA = $ea_result">, +let mayLoad = 1 in { +def LHAU8 : DForm_1<43, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), + (ins memri:$addr), + "lhau $rD, $addr", LdStLHAU, + []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // NO LWAU! -def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHAUX8 : XForm_1<31, 375, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWAUX : XForm_1<31, 373, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } +} // Zero extending loads. let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>; + [(set i64:$rD, (zextloadi8 iaddr:$src))]>; def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>; + [(set i64:$rD, (zextloadi16 iaddr:$src))]>; def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; + [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64; def LBZX8 : XForm_1<31, 87, (outs G8RC:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>; + [(set i64:$rD, (zextloadi8 xaddr:$src))]>; def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>; + [(set i64:$rD, (zextloadi16 xaddr:$src))]>; def LWZX8 : XForm_1<31, 23, (outs G8RC:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, - [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>; + [(set i64:$rD, (zextloadi32 xaddr:$src))]>; // Update forms. let mayLoad = 1 in { -def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc:$ea_result), +def LBZUX8 : XForm_1<31, 119, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc:$ea_result), +def LHZUX8 : XForm_1<31, 311, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), +def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } @@ -648,31 +630,28 @@ def LWZUX8 : XForm_1<31, 55, (outs G8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, - [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; -def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src), - "ld $rD, $src", LdStLD, - []>, isPPC64; + [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64; // The following three definitions are selected for small code model only. // Otherwise, we need to create two instructions to form a 32-bit offset, // so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select(). def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtoc", - [(set G8RC:$rD, - (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64; def LDtocJTI: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtocJTI", - [(set G8RC:$rD, - (PPCtoc_entry tjumptable:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64; def LDtocCPT: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), "#LDtocCPT", - [(set G8RC:$rD, - (PPCtoc_entry tconstpool:$disp, G8RC:$reg))]>, isPPC64; + [(set i64:$rD, + (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; -let hasSideEffects = 1 in { +let hasSideEffects = 1, isCodeGenOnly = 1 in { let RST = 2, DS = 2 in def LDinto_toc: DSForm_1a<58, 0, (outs), (ins G8RC:$reg), "ld 2, 8($reg)", LdStLD, - [(PPCload_toc G8RC:$reg)]>, isPPC64; + [(PPCload_toc i64:$reg)]>, isPPC64; let RST = 2, DS = 10, RA = 1 in def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), @@ -681,18 +660,21 @@ def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), } def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, - [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; - + [(set i64:$rD, (load xaddr:$src))]>, isPPC64; +def LDBRX : XForm_1<31, 532, (outs G8RC:$rD), (ins memrr:$src), + "ldbrx $rD, $src", LdStLoad, + [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64; + let mayLoad = 1 in -def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr), +def LDU : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr), "ldu $rD, $addr", LdStLDU, []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64, NoEncode<"$ea_result">; -def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc:$ea_result), +def LDUX : XForm_1<31, 53, (outs G8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "ldux $rD, $addr", LdStLDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">, isPPC64; } @@ -702,188 +684,167 @@ def : Pat<(PPCload xaddr:$src), (LDX xaddr:$src)>; // Support for medium and large code model. -def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), +def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), "#ADDIStocHA", - [(set G8RC:$rD, - (PPCaddisTocHA G8RC:$reg, tglobaladdr:$disp))]>, + [(set i64:$rD, + (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>, isPPC64; -def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), +def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC_NOX0:$reg), "#LDtocL", - [(set G8RC:$rD, - (PPCldTocL tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; -def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp), + [(set i64:$rD, + (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64; +def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, tocentry:$disp), "#ADDItocL", - [(set G8RC:$rD, - (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64; + [(set i64:$rD, + (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64; // Support for thread-local storage. -def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def ADDISgotTprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDISgotTprelHA", - [(set G8RC:$rD, - (PPCaddisGotTprelHA G8RC:$reg, + [(set i64:$rD, + (PPCaddisGotTprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC:$reg), +def LDgotTprelL: Pseudo<(outs G8RC:$rD), (ins symbolLo64:$disp, G8RC_NOX0:$reg), "#LDgotTprelL", - [(set G8RC:$rD, - (PPCldGotTprelL tglobaltlsaddr:$disp, G8RC:$reg))]>, + [(set i64:$rD, + (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>, isPPC64; -def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g), - (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>; -def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g), + (ADD8TLS $in, tglobaltlsaddr:$g)>; +def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDIStlsgdHA", - [(set G8RC:$rD, - (PPCaddisTlsgdHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), +def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), "#ADDItlsgdL", - [(set G8RC:$rD, - (PPCaddiTlsgdL G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), "#GETtlsADDR", - [(set G8RC:$rD, - (PPCgetTlsAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + [(set i64:$rD, + (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def ADDIStlsldHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDIStlsldHA", - [(set G8RC:$rD, - (PPCaddisTlsldHA G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), +def ADDItlsldL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), "#ADDItlsldL", - [(set G8RC:$rD, - (PPCaddiTlsldL G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; def GETtlsldADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym), "#GETtlsldADDR", - [(set G8RC:$rD, - (PPCgetTlsldAddr G8RC:$reg, tglobaltlsaddr:$sym))]>, + [(set i64:$rD, + (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>, isPPC64; -def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp), +def ADDISdtprelHA: Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolHi64:$disp), "#ADDISdtprelHA", - [(set G8RC:$rD, - (PPCaddisDtprelHA G8RC:$reg, + [(set i64:$rD, + (PPCaddisDtprelHA i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; -def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp), +def ADDIdtprelL : Pseudo<(outs G8RC:$rD), (ins G8RC_NOX0:$reg, symbolLo64:$disp), "#ADDIdtprelL", - [(set G8RC:$rD, - (PPCaddiDtprelL G8RC:$reg, tglobaltlsaddr:$disp))]>, + [(set i64:$rD, + (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>, isPPC64; let PPC970_Unit = 2 in { // Truncating stores. def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src), "stb $rS, $src", LdStStore, - [(truncstorei8 G8RC:$rS, iaddr:$src)]>; + [(truncstorei8 i64:$rS, iaddr:$src)]>; def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src), "sth $rS, $src", LdStStore, - [(truncstorei16 G8RC:$rS, iaddr:$src)]>; + [(truncstorei16 i64:$rS, iaddr:$src)]>; def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src), "stw $rS, $src", LdStStore, - [(truncstorei32 G8RC:$rS, iaddr:$src)]>; + [(truncstorei32 i64:$rS, iaddr:$src)]>; def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, - [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei8 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, - [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei16 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, - [(truncstorei32 G8RC:$rS, xaddr:$dst)]>, + [(truncstorei32 i64:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; // Normal 8-byte stores. def STD : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst), "std $rS, $dst", LdStSTD, - [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64; + [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64; def STDX : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst), "stdx $rS, $dst", LdStSTD, - [(store G8RC:$rS, xaddr:$dst)]>, isPPC64, + [(store i64:$rS, xaddr:$dst)]>, isPPC64, + PPC970_DGroup_Cracked; +def STDBRX: XForm_8<31, 660, (outs), (ins G8RC:$rS, memrr:$dst), + "stdbrx $rS, $dst", LdStStore, + [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; } -let PPC970_Unit = 2 in { +// Stores with Update (pre-inc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrix:$dst), + "stdu $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">, + isPPC64; -def STBU8 : DForm_1a<39, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STWU8 : DForm_1a<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; - -def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS, - s16immX4:$ptroff, ptr_rc:$ptrreg), - "stdu $rS, $ptroff($ptrreg)", LdStSTDU, - [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">, - isPPC64; - - -def STBUX8 : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STHUX8 : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STWUX8 : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti32 G8RC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked; - -def STDUX : XForm_8<31, 181, (outs ptr_rc:$ea_res), - (ins G8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stdux $rS, $ptroff, $ptrreg", LdStSTDU, - [(set ptr_rc:$ea_res, - (pre_store G8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, +def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins G8RC:$rS, memrr:$dst), + "stdux $rS, $dst", LdStSTDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, PPC970_DGroup_Cracked, isPPC64; - -// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register. -def STD_32 : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst), - "std $rT, $dst", LdStSTD, - [(PPCstd_32 GPRC:$rT, ixaddr:$dst)]>, isPPC64; -def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), - "stdx $rT, $dst", LdStSTD, - [(PPCstd_32 GPRC:$rT, xaddr:$dst)]>, isPPC64, - PPC970_DGroup_Cracked; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STDU $rS, iaddroff:$ptroff, $ptrreg)>; + +def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX8 $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STDUX $rS, $ptrreg, $ptroff)>; //===----------------------------------------------------------------------===// @@ -894,10 +855,26 @@ def STDX_32 : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst), let PPC970_Unit = 3, Uses = [RM] in { // FPU Operations. def FCFID : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB), "fcfid $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64; + [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), "fctidz $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64; + [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64; + +def FCFIDU : XForm_26<63, 974, (outs F8RC:$frD), (ins F8RC:$frB), + "fcfidu $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64; +def FCFIDS : XForm_26<59, 846, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfids $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64; +def FCFIDUS : XForm_26<59, 974, (outs F4RC:$frD), (ins F8RC:$frB), + "fcfidus $frD, $frB", FPGeneral, + [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64; +def FCTIDUZ : XForm_26<63, 943, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiduz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64; +def FCTIWUZ : XForm_26<63, 143, (outs F8RC:$frD), (ins F8RC:$frB), + "fctiwuz $frD, $frB", FPGeneral, + [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64; } @@ -906,13 +883,13 @@ def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB), // // Extensions and truncates to/from 32-bit regs. -def : Pat<(i64 (zext GPRC:$in)), - (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32), +def : Pat<(i64 (zext i32:$in)), + (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 0, 32)>; -def : Pat<(i64 (anyext GPRC:$in)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPRC:$in, sub_32)>; -def : Pat<(i32 (trunc G8RC:$in)), - (EXTRACT_SUBREG G8RC:$in, sub_32)>; +def : Pat<(i64 (anyext i32:$in)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>; +def : Pat<(i32 (trunc i64:$in)), + (EXTRACT_SUBREG $in, sub_32)>; // Extending loads with i64 targets. def : Pat<(zextloadi1 iaddr:$src), @@ -939,24 +916,24 @@ def : Pat<(extloadi32 xaddr:$src), // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 6-bit and 7-bit shift // amounts. -def : Pat<(sra G8RC:$rS, GPRC:$rB), - (SRAD G8RC:$rS, GPRC:$rB)>; -def : Pat<(srl G8RC:$rS, GPRC:$rB), - (SRD G8RC:$rS, GPRC:$rB)>; -def : Pat<(shl G8RC:$rS, GPRC:$rB), - (SLD G8RC:$rS, GPRC:$rB)>; +def : Pat<(sra i64:$rS, i32:$rB), + (SRAD $rS, $rB)>; +def : Pat<(srl i64:$rS, i32:$rB), + (SRD $rS, $rB)>; +def : Pat<(shl i64:$rS, i32:$rB), + (SLD $rS, $rB)>; // SHL/SRL -def : Pat<(shl G8RC:$in, (i32 imm:$imm)), - (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>; -def : Pat<(srl G8RC:$in, (i32 imm:$imm)), - (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>; +def : Pat<(shl i64:$in, (i32 imm:$imm)), + (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>; +def : Pat<(srl i64:$in, (i32 imm:$imm)), + (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>; // ROTL -def : Pat<(rotl G8RC:$in, GPRC:$sh), - (RLDCL G8RC:$in, GPRC:$sh, 0)>; -def : Pat<(rotl G8RC:$in, (i32 imm:$imm)), - (RLDICL G8RC:$in, imm:$imm, 0)>; +def : Pat<(rotl i64:$in, i32:$sh), + (RLDCL $in, $sh, 0)>; +def : Pat<(rotl i64:$in, (i32 imm:$imm)), + (RLDICL $in, imm:$imm, 0)>; // Hi and Lo for Darwin Global Addresses. def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>; @@ -967,15 +944,25 @@ def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>; -def : Pat<(PPChi tglobaltlsaddr:$g, G8RC:$in), - (ADDIS8 G8RC:$in, tglobaltlsaddr:$g)>; -def : Pat<(PPClo tglobaltlsaddr:$g, G8RC:$in), - (ADDI8L G8RC:$in, tglobaltlsaddr:$g)>; -def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)), - (ADDIS8 G8RC:$in, tglobaladdr:$g)>; -def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)), - (ADDIS8 G8RC:$in, tconstpool:$g)>; -def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)), - (ADDIS8 G8RC:$in, tjumptable:$g)>; -def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)), - (ADDIS8 G8RC:$in, tblockaddress:$g)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in), + (ADDIS8 $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in), + (ADDI8 $in, tglobaltlsaddr:$g)>; +def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS8 $in, tglobaladdr:$g)>; +def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)), + (ADDIS8 $in, tconstpool:$g)>; +def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)), + (ADDIS8 $in, tjumptable:$g)>; +def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS8 $in, tblockaddress:$g)>; + +// Patterns to match r+r indexed loads and stores for +// addresses without at least 4-byte alignment. +def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)), + (LWAX xoaddr:$src)>; +def : Pat<(i64 (unaligned4load xoaddr:$src)), + (LDX xoaddr:$src)>; +def : Pat<(unaligned4store i64:$rS, xoaddr:$dst), + (STDX $rS, xoaddr:$dst)>; + diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 0cf28ae4b5..fff91df418 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -161,27 +161,90 @@ def vecspltisw : PatLeaf<(build_vector), [{ //===----------------------------------------------------------------------===// // Helpers for defining instructions that directly correspond to intrinsics. -// VA1a_Int - A VAForm_1a intrinsic definition. +// VA1a_Int - A VAForm_1a intrinsic definition of generic type. class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID> : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>; -// VX1_Int - A VXForm_1 intrinsic definition. +// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type. +class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>; + +// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>; + +// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two +// input types and an output type. +class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC), + !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP, + [(set OutTy:$vD, + (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>; + +// VX1_Int - A VXForm_1 intrinsic definition of generic type. class VX1_Int<bits<11> xo, string opc, Intrinsic IntID> : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), !strconcat(opc, " $vD, $vA, $vB"), VecFP, [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>; -// VX2_Int - A VXForm_2 intrinsic definition. +// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type. +class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>; + +// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>; + +// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two +// input types and an output type. +class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType In1Ty, ValueType In2Ty> + : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), + !strconcat(opc, " $vD, $vA, $vB"), VecFP, + [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>; + +// VX2_Int - A VXForm_1 intrinsic definition of generic type. class VX2_Int<bits<11> xo, string opc, Intrinsic IntID> : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), !strconcat(opc, " $vD, $vB"), VecFP, [(set VRRC:$vD, (IntID VRRC:$vB))]>; +// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type. +class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID> + : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + !strconcat(opc, " $vD, $vB"), VecFP, + [(set v4f32:$vD, (IntID v4f32:$vB))]>; + +// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the +// inputs doesn't match the type of the output. +class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy, + ValueType InTy> + : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB), + !strconcat(opc, " $vD, $vB"), VecFP, + [(set OutTy:$vD, (IntID InTy:$vB))]>; + //===----------------------------------------------------------------------===// // Instruction Definitions. +def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +let Predicates = [HasAltivec] in { + +let isCodeGenOnly = 1 in { def DSS : DSS_Form<822, (outs), (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2), "dss $STRM", LdStLoad /*FIXME*/, []>; @@ -213,74 +276,79 @@ def DSTST64 : DSS_Form<374, (outs), def DSTSTT64 : DSS_Form<374, (outs), (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB), "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>; +} def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins), "mfvscr $vD", LdStStore, - [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; + [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>; def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB), "mtvscr $vB", LdStLoad, - [(int_ppc_altivec_mtvscr VRRC:$vB)]>; + [(int_ppc_altivec_mtvscr v4i32:$vB)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads. def LVEBX: XForm_1<31, 7, (outs VRRC:$vD), (ins memrr:$src), "lvebx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; + [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>; def LVEHX: XForm_1<31, 39, (outs VRRC:$vD), (ins memrr:$src), "lvehx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; + [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>; def LVEWX: XForm_1<31, 71, (outs VRRC:$vD), (ins memrr:$src), "lvewx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>; def LVX : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src), "lvx $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>; def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src), "lvxl $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; + [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>; } def LVSL : XForm_1<31, 6, (outs VRRC:$vD), (ins memrr:$src), "lvsl $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>, PPC970_Unit_LSU; def LVSR : XForm_1<31, 38, (outs VRRC:$vD), (ins memrr:$src), "lvsr $vD, $src", LdStLoad, - [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, + [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>, PPC970_Unit_LSU; let PPC970_Unit = 2 in { // Stores. def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst), "stvebx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>; def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst), "stvehx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>; def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst), "stvewx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>; def STVX : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst), "stvx $rS, $dst", LdStStore, - [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>; def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst), "stvxl $rS, $dst", LdStStore, - [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>; + [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>; } let PPC970_Unit = 5 in { // VALU Operations. // VA-Form instructions. 3-input AltiVec ops. def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vmaddfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fma VRRC:$vA, VRRC:$vC, VRRC:$vB))]>; + [(set v4f32:$vD, + (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>; def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB), "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fneg (fma VRRC:$vA, VRRC:$vC, - (fneg VRRC:$vB))))]>; + [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC, + (fneg v4f32:$vB))))]>; + +def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>; +def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs, + v8i16>; +def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>; -def VMHADDSHS : VA1a_Int<32, "vmhaddshs", int_ppc_altivec_vmhaddshs>; -def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>; -def VMLADDUHM : VA1a_Int<34, "vmladduhm", int_ppc_altivec_vmladduhm>; -def VPERM : VA1a_Int<43, "vperm", int_ppc_altivec_vperm>; -def VSEL : VA1a_Int<42, "vsel", int_ppc_altivec_vsel>; +def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm, + v4i32, v4i32, v16i8>; +def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>; // Shuffles. def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), @@ -291,25 +359,25 @@ def VSLDOI : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH), // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vaddfp $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>; + [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>; def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vaddubm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>; def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vadduhm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>; + [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>; def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vadduwm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>; -def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>; -def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>; -def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>; -def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>; -def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>; -def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>; -def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>; +def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>; +def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>; +def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>; +def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>; +def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>; +def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>; +def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>; def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), @@ -322,20 +390,20 @@ def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vcfsx $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>; def VCFUX : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vcfux $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>; def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctsxs $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>; def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vctuxs $vD, $vB, $UIMM", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>; // Defines with the UIM field set to 0 for floating-point // to integer (fp_to_sint/fp_to_uint) conversions and integer @@ -343,49 +411,49 @@ def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), let VA = 0 in { def VCFSX_0 : VXForm_1<842, (outs VRRC:$vD), (ins VRRC:$vB), "vcfsx $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfsx VRRC:$vB, 0))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>; def VCTUXS_0 : VXForm_1<906, (outs VRRC:$vD), (ins VRRC:$vB), "vctuxs $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctuxs VRRC:$vB, 0))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>; def VCFUX_0 : VXForm_1<778, (outs VRRC:$vD), (ins VRRC:$vB), "vcfux $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vcfux VRRC:$vB, 0))]>; + [(set v4f32:$vD, + (int_ppc_altivec_vcfux v4i32:$vB, 0))]>; def VCTSXS_0 : VXForm_1<970, (outs VRRC:$vD), (ins VRRC:$vB), "vctsxs $vD, $vB, 0", VecFP, - [(set VRRC:$vD, - (int_ppc_altivec_vctsxs VRRC:$vB, 0))]>; + [(set v4i32:$vD, + (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>; } -def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>; -def VLOGEFP : VX2_Int<458, "vlogefp", int_ppc_altivec_vlogefp>; - -def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>; -def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>; -def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>; -def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>; -def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>; -def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>; - -def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>; -def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>; -def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>; -def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>; -def VMAXUB : VX1_Int< 2, "vmaxub", int_ppc_altivec_vmaxub>; -def VMAXUH : VX1_Int< 66, "vmaxuh", int_ppc_altivec_vmaxuh>; -def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>; -def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>; -def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>; -def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>; -def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>; -def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>; -def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>; -def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>; +def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>; +def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>; + +def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>; +def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>; +def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>; +def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>; +def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>; +def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>; + +def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>; +def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>; +def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>; +def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>; +def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>; +def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>; +def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>; +def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>; +def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>; +def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>; +def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>; +def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>; +def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>; +def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>; def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrghh $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>; @@ -394,7 +462,7 @@ def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>; def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglb $vD, $vA, $vB", VecFP, - [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>; + [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>; def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglh $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>; @@ -402,55 +470,74 @@ def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vmrglw $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>; -def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>; -def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>; -def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>; -def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>; -def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>; -def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>; - -def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>; -def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>; -def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>; -def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>; -def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>; -def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>; -def VMULOUB : VX1_Int< 8, "vmuloub", int_ppc_altivec_vmuloub>; -def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>; +def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm, + v4i32, v16i8, v4i32>; +def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm, + v4i32, v8i16, v4i32>; +def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs, + v4i32, v8i16, v4i32>; +def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm, + v4i32, v16i8, v4i32>; +def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm, + v4i32, v8i16, v4i32>; +def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs, + v4i32, v8i16, v4i32>; + +def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb, + v8i16, v16i8>; +def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh, + v4i32, v8i16>; +def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub, + v8i16, v16i8>; +def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh, + v4i32, v8i16>; +def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb, + v8i16, v16i8>; +def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh, + v4i32, v8i16>; +def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub, + v8i16, v16i8>; +def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh, + v4i32, v8i16>; -def VREFP : VX2_Int<266, "vrefp", int_ppc_altivec_vrefp>; -def VRFIM : VX2_Int<714, "vrfim", int_ppc_altivec_vrfim>; -def VRFIN : VX2_Int<522, "vrfin", int_ppc_altivec_vrfin>; -def VRFIP : VX2_Int<650, "vrfip", int_ppc_altivec_vrfip>; -def VRFIZ : VX2_Int<586, "vrfiz", int_ppc_altivec_vrfiz>; -def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; +def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>; +def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>; +def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>; +def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>; +def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>; +def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>; -def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>; +def VSUBCUW : VX1_Int_Ty<74, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>; def VSUBFP : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubfp $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>; + [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>; def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsububm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>; + [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>; def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubuhm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>; + [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>; def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vsubuwm $vD, $vA, $vB", VecGeneral, - [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>; + [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>; -def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>; -def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>; -def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>; -def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>; -def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>; -def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>; -def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>; -def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>; -def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>; -def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>; -def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>; +def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>; +def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>; +def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>; +def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>; +def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>; +def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>; + +def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>; +def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>; + +def VSUM4SBS: VX1_Int_Ty3<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs, + v4i32, v16i8, v4i32>; +def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs, + v4i32, v8i16, v4i32>; +def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs, + v4i32, v16i8, v4i32>; def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vnor $vD, $vA, $vB", VecFP, @@ -463,15 +550,16 @@ def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vxor $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>; -def VRLB : VX1_Int< 4, "vrlb", int_ppc_altivec_vrlb>; -def VRLH : VX1_Int< 68, "vrlh", int_ppc_altivec_vrlh>; -def VRLW : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>; +def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>; +def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>; +def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>; -def VSL : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >; -def VSLO : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>; -def VSLB : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>; -def VSLH : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>; -def VSLW : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>; +def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >; +def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>; + +def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>; +def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>; +def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>; def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), "vspltb $vD, $vB, $UIMM", VecPerm, @@ -486,60 +574,74 @@ def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB), [(set VRRC:$vD, (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>; -def VSR : VX1_Int< 708, "vsr" , int_ppc_altivec_vsr>; -def VSRO : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>; -def VSRAB : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>; -def VSRAH : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>; -def VSRAW : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>; -def VSRB : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>; -def VSRH : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>; -def VSRW : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>; +def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>; +def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>; + +def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>; +def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>; +def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>; +def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>; +def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>; +def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>; def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltisb $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>; + [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>; def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltish $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>; + [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>; def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM), "vspltisw $vD, $SIMM", VecPerm, - [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>; + [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>; // Vector Pack. -def VPKPX : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>; -def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>; -def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>; -def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>; -def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>; +def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx, + v8i16, v4i32>; +def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss, + v16i8, v8i16>; +def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus, + v16i8, v8i16>; +def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss, + v16i8, v4i32>; +def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus, + v8i16, v4i32>; def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuhum $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; -def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>; +def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus, + v16i8, v8i16>; def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB), "vpkuwum $vD, $vA, $vB", VecFP, [(set VRRC:$vD, (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>; -def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>; +def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus, + v8i16, v4i32>; // Vector Unpack. -def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>; -def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>; -def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>; -def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>; -def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>; -def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>; +def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx, + v4i32, v8i16>; +def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb, + v8i16, v16i8>; +def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh, + v4i32, v8i16>; +def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx, + v4i32, v8i16>; +def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb, + v8i16, v16i8>; +def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh, + v4i32, v8i16>; // Altivec Comparisons. class VCMP<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, - [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>; + [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>; class VCMPo<bits<10> xo, string asmstr, ValueType Ty> : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare, - [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> { + [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> { let Defs = [CR6]; let RC = 1; } @@ -578,13 +680,14 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>; def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>; def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>; +let isCodeGenOnly = 1 in def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins), "vxor $vD, $vD, $vD", VecFP, [(set VRRC:$vD, (v4i32 immAllZerosV))]>; let IMM=-1 in { def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins), "vspltisw $vD, -1", VecFP, - [(set VRRC:$vD, (v4i32 immAllOnesV))]>; + [(set v4i32:$vD, (v4i32 immAllOnesV))]>; } } // VALU Operations. @@ -597,31 +700,31 @@ def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>; def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>; // * 32-bit -def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM), - (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM), - (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM), + (DST 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM), + (DSTT 1, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM), + (DSTST 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM), + (DSTSTT 1, imm:$STRM, $rA, $rB)>; // * 64-bit -def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM), - (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; -def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM), - (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>; +def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM), + (DST64 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM), + (DSTT64 1, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM), + (DSTST64 0, imm:$STRM, $rA, $rB)>; +def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM), + (DSTSTT64 1, imm:$STRM, $rA, $rB)>; // Loads. def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>; // Stores. -def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst), - (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>; +def : Pat<(store v4i32:$rS, xoaddr:$dst), + (STVX $rS, xoaddr:$dst)>; // Bit conversions. def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>; @@ -643,93 +746,96 @@ def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>; // Shuffles. // Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x) -def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef), - (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>; -def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef), - (VPKUWUM VRRC:$vA, VRRC:$vA)>; -def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef), - (VPKUHUM VRRC:$vA, VRRC:$vA)>; +def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef), + (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm VRRC:$in))>; +def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef), + (VPKUWUM $vA, $vA)>; +def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef), + (VPKUHUM $vA, $vA)>; // Match vmrg*(x,x) -def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGLW VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHB VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHH VRRC:$vA, VRRC:$vA)>; -def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef), - (VMRGHW VRRC:$vA, VRRC:$vA)>; +def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef), + (VMRGLB $vA, $vA)>; +def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef), + (VMRGLH $vA, $vA)>; +def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef), + (VMRGLW $vA, $vA)>; +def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef), + (VMRGHB $vA, $vA)>; +def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef), + (VMRGHH $vA, $vA)>; +def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef), + (VMRGHW $vA, $vA)>; // Logical Operations -def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>; +def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR $vA, $vA)>; def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))), - (VNOR VRRC:$A, VRRC:$B)>; + (VNOR $A, $B)>; def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))), - (VANDC VRRC:$A, VRRC:$B)>; + (VANDC $A, $B)>; -def : Pat<(fmul VRRC:$vA, VRRC:$vB), - (VMADDFP VRRC:$vA, VRRC:$vB, +def : Pat<(fmul v4f32:$vA, v4f32:$vB), + (VMADDFP $vA, $vB, (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; // Fused multiply add and multiply sub for packed float. These are represented // separately from the real instructions above, for operations that must have // the additional precision, such as Newton-Rhapson (used by divide, sqrt) -def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C), - (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; -def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), - (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; -def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C), - (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>; -def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C), - (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>; +def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C), + (VMADDFP $A, $B, $C)>; +def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C), + (VNMSUBFP $A, $B, $C)>; def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC), - (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>; + (VPERM $vA, $vB, $vC)>; // Vector shifts -def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>; - -def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>; - -def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))), - (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>; -def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))), - (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>; -def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))), - (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>; +def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSLB $vA, $vB))>; +def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSLH $vA, $vB))>; +def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSLW $vA, $vB))>; + +def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRB $vA, $vB))>; +def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRH $vA, $vB))>; +def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRW $vA, $vB))>; + +def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)), + (v16i8 (VSRAB $vA, $vB))>; +def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)), + (v8i16 (VSRAH $vA, $vB))>; +def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)), + (v4i32 (VSRAW $vA, $vB))>; // Float to integer and integer to float conversions -def : Pat<(v4i32 (fp_to_sint (v4f32 VRRC:$vA))), - (VCTSXS_0 VRRC:$vA)>; -def : Pat<(v4i32 (fp_to_uint (v4f32 VRRC:$vA))), - (VCTUXS_0 VRRC:$vA)>; -def : Pat<(v4f32 (sint_to_fp (v4i32 VRRC:$vA))), - (VCFSX_0 VRRC:$vA)>; -def : Pat<(v4f32 (uint_to_fp (v4i32 VRRC:$vA))), - (VCFUX_0 VRRC:$vA)>; +def : Pat<(v4i32 (fp_to_sint v4f32:$vA)), + (VCTSXS_0 $vA)>; +def : Pat<(v4i32 (fp_to_uint v4f32:$vA)), + (VCTUXS_0 $vA)>; +def : Pat<(v4f32 (sint_to_fp v4i32:$vA)), + (VCFSX_0 $vA)>; +def : Pat<(v4f32 (uint_to_fp v4i32:$vA)), + (VCFUX_0 $vA)>; // Floating-point rounding -def : Pat<(v4f32 (ffloor (v4f32 VRRC:$vA))), - (VRFIM VRRC:$vA)>; -def : Pat<(v4f32 (fceil (v4f32 VRRC:$vA))), - (VRFIP VRRC:$vA)>; -def : Pat<(v4f32 (ftrunc (v4f32 VRRC:$vA))), - (VRFIZ VRRC:$vA)>; -def : Pat<(v4f32 (fnearbyint (v4f32 VRRC:$vA))), - (VRFIN VRRC:$vA)>; +def : Pat<(v4f32 (ffloor v4f32:$vA)), + (VRFIM $vA)>; +def : Pat<(v4f32 (fceil v4f32:$vA)), + (VRFIP $vA)>; +def : Pat<(v4f32 (ftrunc v4f32:$vA)), + (VRFIZ $vA)>; +def : Pat<(v4f32 (fnearbyint v4f32:$vA)), + (VRFIN $vA)>; + +} // end HasAltivec + diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index c3c171cd21..400b7e367b 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -120,6 +120,18 @@ class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL, let CR = 0; } +class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk, + dag OOL, dag IOL, string asmstr> + : I<opcode, OOL, IOL, asmstr, BrB> { + bits<14> BD; + + let Inst{6-10} = bo; + let Inst{11-15} = bi; + let Inst{16-29} = BD; + let Inst{30} = aa; + let Inst{31} = lk; +} + // 1.7.4 D-Form class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern> @@ -664,14 +676,13 @@ class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr, // This is probably 1.7.9, but I don't have the reference that uses this // numbering scheme... class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, - string cstr, InstrItinClass itin, list<dag>pattern> + InstrItinClass itin, list<dag>pattern> : I<opcode, OOL, IOL, asmstr, itin> { bits<8> FM; bits<5> rT; bit RC = 0; // set by isDOT let Pattern = pattern; - let Constraints = cstr; let Inst{6} = 0; let Inst{7-14} = FM; @@ -765,16 +776,14 @@ class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, bits<5> RT; bits<5> RA; bits<5> RB; - bits<7> BIBO; // 2 bits of BI and 5 bits of BO (must be 12). - bits<3> CR; + bits<5> COND; let Pattern = pattern; let Inst{6-10} = RT; let Inst{11-15} = RA; let Inst{16-20} = RB; - let Inst{21-23} = CR; - let Inst{24-25} = BIBO{6-5}; + let Inst{21-25} = COND; let Inst{26-30} = xo; let Inst{31} = 0; } @@ -987,6 +996,7 @@ class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr, //===----------------------------------------------------------------------===// class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> : I<0, OOL, IOL, asmstr, NoItinerary> { + let isCodeGenOnly = 1; let PPC64 = 0; let Pattern = pattern; let Inst{31-0} = 0; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index a0517a80a9..69c54ed084 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -33,11 +33,6 @@ #define GET_INSTRINFO_CTOR #include "PPCGenInstrInfo.inc" -namespace llvm { -extern cl::opt<bool> DisablePPC32RS; -extern cl::opt<bool> DisablePPC64RS; -} - using namespace llvm; static cl:: @@ -99,12 +94,18 @@ bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { + // Note: This list must be kept consistent with LoadRegFromStackSlot. switch (MI->getOpcode()) { default: break; case PPC::LD: case PPC::LWZ: case PPC::LFS: case PPC::LFD: + case PPC::RESTORE_CR: + case PPC::LVX: + case PPC::RESTORE_VRSAVE: + // Check for the operands added by addFrameReference (the immediate is the + // offset which defaults to 0). if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && MI->getOperand(2).isFI()) { FrameIndex = MI->getOperand(2).getIndex(); @@ -117,12 +118,18 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { + // Note: This list must be kept consistent with StoreRegToStackSlot. switch (MI->getOpcode()) { default: break; case PPC::STD: case PPC::STW: case PPC::STFS: case PPC::STFD: + case PPC::SPILL_CR: + case PPC::STVX: + case PPC::SPILL_VRSAVE: + // Check for the operands added by addFrameReference (the immediate is the + // offset which defaults to 0). if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() && MI->getOperand(2).isFI()) { FrameIndex = MI->getOperand(2).getIndex(); @@ -444,40 +451,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const{ + // Note: If additional store instructions are added here, + // update isStoreToStackSlot. + DebugLoc DL; if (PPC::GPRCRegClass.hasSubClassEq(RC)) { - if (SrcReg != PPC::LR) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - } else { - // FIXME: this spills LR immediately to memory in one step. To do this, - // we use R11, which we know cannot be used in the prolog/epilog. This is - // a hack. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11)); - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) - .addReg(PPC::R11, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { - if (SrcReg != PPC::LR8) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - } else { - // FIXME: this spills LR immediately to memory in one step. To do this, - // we use X11, which we know cannot be used in the prolog/epilog. This is - // a hack. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11)); - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) - .addReg(PPC::X11, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD)) .addReg(SrcReg, @@ -489,47 +478,11 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, getKillRegState(isKill)), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) - .addReg(SrcReg, - getKillRegState(isKill)), - FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - - bool is64Bit = TM.getSubtargetImpl()->isPPC64(); - // We need to store the CR in the low 4-bits of the saved value. First, - // issue a MFCR to save all of the CRBits. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - (is64Bit ? PPC::X2 : PPC::R2) : - (is64Bit ? PPC::X0 : PPC::R0); - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud : - PPC::MFCRpseud), ScratchReg) - .addReg(SrcReg, getKillRegState(isKill))); - - // If the saved register wasn't CR0, shift the bits left so that they are - // in CR0's slot. - if (SrcReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4; - // rlwinm scratch, scratch, ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 : - PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(ShiftBits) - .addImm(0).addImm(31)); - } - - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ? - PPC::STW8 : PPC::STW)) - .addReg(ScratchReg, - getKillRegState(isKill)), - FrameIdx)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { // FIXME: We use CRi here because there is no mtcrf on a bit. Since the // backend currently only uses CR1EQ as an individual bit, this should @@ -562,23 +515,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, Reg = PPC::CR7; return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // STVX VAL, 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX)) - .addReg(SrcReg, getKillRegState(isKill)) - .addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + assert(TM.getSubtargetImpl()->isDarwin() && + "VRSAVE only needs spill/restore on Darwin"); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) + .addReg(SrcReg, + getKillRegState(isKill)), + FrameIdx)); + SpillsVRS = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -595,10 +547,19 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); SmallVector<MachineInstr*, 4> NewMIs; - if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false, SpillsVRS = false; + if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs, + NonRI, SpillsVRS)) FuncInfo->setSpillsCR(); - } + + if (SpillsVRS) + FuncInfo->setSpillsVRSAVE(); + + if (NonRI) + FuncInfo->setHasNonRISpills(); for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -616,25 +577,17 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs)const{ + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const{ + // Note: If additional load instructions are added here, + // update isLoadFromStackSlot. + if (PPC::GPRCRegClass.hasSubClassEq(RC)) { - if (DestReg != PPC::LR) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - DestReg), FrameIdx)); - } else { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - PPC::R11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), + DestReg), FrameIdx)); } else if (PPC::G8RCRegClass.hasSubClassEq(RC)) { - if (DestReg != PPC::LR8) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), - FrameIdx)); - } else { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), - PPC::X11), FrameIdx)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg), + FrameIdx)); } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) { NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg), FrameIdx)); @@ -642,37 +595,10 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg), FrameIdx)); } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) { - if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) || - (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, - get(PPC::RESTORE_CR), DestReg) - , FrameIdx)); - return true; - } else { - // FIXME: We need a scatch reg here. The trouble with using R0 is that - // it's possible for the stack frame to be so big the save location is - // out of range of immediate offsets, necessitating another register. - // We hack this on Darwin by reserving R2. It's probably broken on Linux - // at the moment. - unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ? - PPC::R2 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ), - ScratchReg), FrameIdx)); - - // If the reloaded register isn't CR0, shift the bits right so that they are - // in the right CR's slot. - if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; - // rlwinm r11, r11, 32-ShiftBits, 0, 31. - NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg) - .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0) - .addImm(31)); - } - - NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ? - PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(ScratchReg)); - } + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_CR), DestReg), + FrameIdx)); + return true; } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) { unsigned Reg = 0; @@ -702,21 +628,20 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, Reg = PPC::CR7; return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx, - &PPC::CRRCRegClass, NewMIs); + &PPC::CRRCRegClass, NewMIs, NonRI, SpillsVRS); } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) { - // We don't have indexed addressing for vector loads. Emit: - // R0 = ADDI FI# - // Dest = LVX 0, R0 - // - // FIXME: We use R0 here, because it isn't available for RA. - bool Is64Bit = TM.getSubtargetImpl()->isPPC64(); - unsigned Instr = Is64Bit ? PPC::ADDI8 : PPC::ADDI; - unsigned GPR0 = Is64Bit ? PPC::X0 : PPC::R0; - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Instr), GPR0), - FrameIdx, 0, 0)); - NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(GPR0) - .addReg(GPR0)); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg), + FrameIdx)); + NonRI = true; + } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { + assert(TM.getSubtargetImpl()->isDarwin() && + "VRSAVE only needs spill/restore on Darwin"); + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, + get(PPC::RESTORE_VRSAVE), + DestReg), + FrameIdx)); + SpillsVRS = true; } else { llvm_unreachable("Unknown regclass!"); } @@ -734,10 +659,21 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, SmallVector<MachineInstr*, 4> NewMIs; DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); - if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) { - PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setHasSpills(); + + bool NonRI = false, SpillsVRS = false; + if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs, + NonRI, SpillsVRS)) FuncInfo->setSpillsCR(); - } + + if (SpillsVRS) + FuncInfo->setSpillsVRSAVE(); + + if (NonRI) + FuncInfo->setHasNonRISpills(); + for (unsigned i = 0, e = NewMIs.size(); i != e; ++i) MBB.insert(MI, NewMIs[i]); @@ -786,8 +722,8 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; - case PPC::BL8_NOP_ELF: - case PPC::BLA8_NOP_ELF: + case PPC::BL8_NOP: + case PPC::BLA8_NOP: return 8; default: return 4; // PowerPC instructions are all 4 bytes diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 374213ea43..635e3480b0 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -71,11 +71,13 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const; bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, unsigned DestReg, int FrameIdx, const TargetRegisterClass *RC, - SmallVectorImpl<MachineInstr*> &NewMIs) const; + SmallVectorImpl<MachineInstr*> &NewMIs, + bool &NonRI, bool &SpillsVRS) const; public: explicit PPCInstrInfo(PPCTargetMachine &TM); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 460e94342d..067f5aacfa 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -20,6 +20,10 @@ include "PPCInstrFormats.td" def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx SDTCisVT<0, f64>, SDTCisPtrTy<1> ]>; +def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x + SDTCisVT<0, f64>, SDTCisPtrTy<1> +]>; + def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -36,10 +40,10 @@ def SDT_PPCcondbr : SDTypeProfile<0, 3, [ ]>; def SDT_PPClbrx : SDTypeProfile<1, 2, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPCstbrx : SDTypeProfile<0, 3, [ - SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> + SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; def SDT_PPClarx : SDTypeProfile<1, 1, [ @@ -53,32 +57,33 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; -def SDT_PPCnop : SDTypeProfile<0, 0, []>; //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // -def PPCfcfid : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>; +def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; +def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; +def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; +def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; +def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; +def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore]>; +def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad]>; +def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, + [SDNPHasChain, SDNPMayLoad]>; + +// Extract FPSCR (not modeled at the DAG level). +def PPCmffs : SDNode<"PPCISD::MFFS", + SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>; + +// Perform FADD in round-to-zero mode. +def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; -// This sequence is used for long double->int conversions. It changes the -// bits in the FPSCR which is not modelled. -def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, - [SDNPOutGlue]>; -def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>, - [SDNPInGlue, SDNPOutGlue]>; -def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, - [SDNPInGlue, SDNPOutGlue]>; -def PPCmtfsf : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, - [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>, - SDTCisVT<3, f64>]>, - [SDNPInGlue]>; def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. @@ -113,10 +118,6 @@ def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; -def PPCextsw_32 : SDNode<"PPCISD::EXTSW_32" , SDTIntUnaryOp>; -def PPCstd_32 : SDNode<"PPCISD::STD_32" , SDTStore, - [SDNPHasChain, SDNPMayStore]>; - // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -124,16 +125,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; -def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; +def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, @@ -144,13 +141,9 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; +def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -158,6 +151,14 @@ def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, + SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>; @@ -278,6 +279,38 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue(); }], HI16>; +// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require +// restricted memrix (offset/4) constants are alignment sensitive. If these +// offsets are hidden behind TOC entries than the values of the lower-order +// bits cannot be checked directly. As a result, we need to also incorporate +// an alignment check into the relevant patterns. + +def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 4; +}]>; +def aligned4pre_store : PatFrag< + (ops node:$val, node:$base, node:$offset), + (pre_store node:$val, node:$base, node:$offset), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 4; +}]>; + +def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() < 4; +}]>; +def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() < 4; +}]>; //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -314,9 +347,6 @@ def s16imm : Operand<i32> { def u16imm : Operand<i32> { let PrintMethod = "printU16ImmOperand"; } -def s16immX4 : Operand<i32> { // Multiply imm by 4 before printing. - let PrintMethod = "printS16X4ImmOperand"; -} def directbrtarget : Operand<OtherVT> { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; @@ -344,26 +374,37 @@ def crbitm: Operand<i8> { let EncoderMethod = "get_crbitm_encoding"; } // Address operands +// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode). +def ptr_rc_nor0 : PointerLikeRegClass<1>; + +def dispRI : Operand<iPTR>; +def dispRIX : Operand<iPTR>; + def memri : Operand<iPTR> { let PrintMethod = "printMemRegImm"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIEncoding"; } def memrr : Operand<iPTR> { let PrintMethod = "printMemRegReg"; - let MIOperandInfo = (ops ptr_rc:$offreg, ptr_rc:$ptrreg); + let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc:$offreg); } def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; - let MIOperandInfo = (ops symbolLo:$imm, ptr_rc:$reg); + let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg); let EncoderMethod = "getMemRIXEncoding"; } -// PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg -// that doesn't matter. -def pred : PredicateOperand<OtherVT, (ops imm, CRRC), - (ops (i32 20), (i32 zero_reg))> { +// A single-register address. This is used with the SjLj +// pseudo-instructions. +def memr : Operand<iPTR> { + let MIOperandInfo = (ops ptr_rc:$ptrreg); +} + +// PowerPC Predicate operand. +def pred : Operand<OtherVT> { let PrintMethod = "printPredicateOperand"; + let MIOperandInfo = (ops i32imm:$bibo, CRRC:$reg); } // Define PowerPC specific addressing mode. @@ -372,9 +413,12 @@ def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>; def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>; def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std" +// The address in a single register. This is used with the SjLj +// pseudo-instructions. +def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>; + /// This is just the offset part of iaddr, used for preinc. def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>; -def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. @@ -401,17 +445,22 @@ def UPDATE_VRSAVE : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS), let Defs = [R1], Uses = [R1] in def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "#DYNALLOC", - [(set GPRC:$result, - (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>; + [(set i32:$result, + (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>; // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after // instruction selection into a branch sequence. let usesCustomInserter = 1, // Expanded after instruction selection. PPC970_Single = 1 in { - def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F, + // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes + // because either operand might become the first operand in an isel, and + // that operand cannot be r0. + def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, + GPRC_NOR0:$T, GPRC_NOR0:$F, i32imm:$BROPC), "#SELECT_CC_I4", []>; - def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F, + def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, + G8RC_NOX0:$T, G8RC_NOX0:$F, i32imm:$BROPC), "#SELECT_CC_I8", []>; def SELECT_CC_F4 : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F, @@ -438,10 +487,9 @@ def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F), "#RESTORE_CR", []>; let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in { - let isCodeGenOnly = 1, isReturn = 1, Uses = [LR, RM] in - def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p), - "b${p:cc}lr ${p:reg}", BrB, - [(retflag)]>; + let isReturn = 1, Uses = [LR, RM] in + def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", BrB, + [(retflag)]>; let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>; } @@ -473,46 +521,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in { } } -// Darwin ABI Calls. -let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { - // Convenient aliases for call instructions - let Uses = [RM] in { - def BL_Darwin : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_Darwin : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>; - } - let Uses = [CTR, RM] in { - def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>; +// The direct BCL used by the SjLj setjmp code. +let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in { + let Defs = [LR], Uses = [RM] in { + def BCL : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst), + "bcl 20, 31, $dst">; } } -// SVR4 ABI Calls. let isCall = 1, PPC970_Unit = 7, Defs = [LR] in { // Convenient aliases for call instructions let Uses = [RM] in { - def BL_SVR4 : IForm<18, 0, 1, - (outs), (ins calltarget:$func), - "bl $func", BrB, []>; // See Pat patterns below. - def BLA_SVR4 : IForm<18, 1, 1, - (outs), (ins aaddr:$func), - "bla $func", BrB, - [(PPCcall_SVR4 (i32 imm:$func))]>; + def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func), + "bl $func", BrB, []>; // See Pat patterns below. + def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func), + "bla $func", BrB, [(PPCcall (i32 imm:$func))]>; } let Uses = [CTR, RM] in { - def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1, - (outs), (ins), - "bctrl", BrB, - [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>; + def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins), + "bctrl", BrB, [(PPCbctrl)]>, + Requires<[In32BitMode]>; } } - let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in def TCRETURNdi :Pseudo< (outs), (ins calltarget:$dst, i32imm:$offset), @@ -531,6 +562,8 @@ def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset), []>; +let isCodeGenOnly = 1 in { + let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1, isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>, @@ -544,6 +577,7 @@ def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst), "b $dst", BrB, []>; +} let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in @@ -551,6 +585,22 @@ def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst), "ba $dst", BrB, []>; +let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in { + def EH_SjLj_SetJmp32 : Pseudo<(outs GPRC:$dst), (ins memr:$buf), + "#EH_SJLJ_SETJMP32", + [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>, + Requires<[In32BitMode]>; + let isTerminator = 1 in + def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf), + "#EH_SJLJ_LONGJMP32", + [(PPCeh_sjlj_longjmp addr:$buf)]>, + Requires<[In32BitMode]>; +} + +let isBranch = 1, isTerminator = 1 in { + def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst), + "#EH_SjLj_Setup\t$dst", []>; +} // DCB* instructions. def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), @@ -586,93 +636,90 @@ let usesCustomInserter = 1 in { let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I8", - [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I8", - [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I8", - [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I8", - [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "ATOMIC_LOAD_XOR_I8", - [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I8", - [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I16", - [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I16", - [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I16", - [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I16", - [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I16", - [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I16", - [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_ADD_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_ADD_I32", - [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_SUB_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_SUB_I32", - [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_AND_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_AND_I32", - [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_OR_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_OR_I32", - [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_XOR_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_XOR_I32", - [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_LOAD_NAND_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "#ATOMIC_LOAD_NAND_I32", - [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>; + [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>; def ATOMIC_CMP_SWAP_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I8", - [(set GPRC:$dst, - (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new", - [(set GPRC:$dst, - (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_CMP_SWAP_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new", - [(set GPRC:$dst, - (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>; + [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>; def ATOMIC_SWAP_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_i8", - [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I16 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I16", - [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>; def ATOMIC_SWAP_I32 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "#ATOMIC_SWAP_I32", - [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>; + [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>; } } // Instructions to support atomic operations def LWARX : XForm_1<31, 20, (outs GPRC:$rD), (ins memrr:$src), "lwarx $rD, $src", LdStLWARX, - [(set GPRC:$rD, (PPClarx xoaddr:$src))]>; + [(set i32:$rD, (PPClarx xoaddr:$src))]>; let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst), "stwcx. $rS, $dst", LdStSTWCX, - [(PPCstcx GPRC:$rS, xoaddr:$dst)]>, + [(PPCstcx i32:$rS, xoaddr:$dst)]>, isDOT; let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in @@ -686,94 +733,94 @@ def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>; let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src), "lbz $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>; + [(set i32:$rD, (zextloadi8 iaddr:$src))]>; def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src), "lha $rD, $src", LdStLHA, - [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>, + [(set i32:$rD, (sextloadi16 iaddr:$src))]>, PPC970_DGroup_Cracked; def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src), "lhz $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>; + [(set i32:$rD, (zextloadi16 iaddr:$src))]>; def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src), "lwz $rD, $src", LdStLoad, - [(set GPRC:$rD, (load iaddr:$src))]>; + [(set i32:$rD, (load iaddr:$src))]>; def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src), "lfs $rD, $src", LdStLFD, - [(set F4RC:$rD, (load iaddr:$src))]>; + [(set f32:$rD, (load iaddr:$src))]>; def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src), "lfd $rD, $src", LdStLFD, - [(set F8RC:$rD, (load iaddr:$src))]>; + [(set f64:$rD, (load iaddr:$src))]>; // Unindexed (r+i) Loads with Update (preinc). let mayLoad = 1 in { -def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lbzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhau $rD, $addr", LdStLHAU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lhzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lwzu $rD, $addr", LdStLoadUpd, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfsu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; -def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr), +def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr), "lfdu $rD, $addr", LdStLFDU, []>, RegConstraint<"$addr.reg = $ea_result">, NoEncode<"$ea_result">; // Indexed (r+r) Loads with Update (preinc). -def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc:$ea_result), +def LBZUX : XForm_1<31, 119, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lbzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHAUX : XForm_1<31, 375, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhaux $rD, $addr", LdStLHAU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc:$ea_result), +def LHZUX : XForm_1<31, 311, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lhzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc:$ea_result), +def LWZUX : XForm_1<31, 55, (outs GPRC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lwzux $rD, $addr", LdStLoadUpd, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc:$ea_result), +def LFSUX : XForm_1<31, 567, (outs F4RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfsux $rD, $addr", LdStLFDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; -def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), +def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc_nor0:$ea_result), (ins memrr:$addr), "lfdux $rD, $addr", LdStLFDU, - []>, RegConstraint<"$addr.offreg = $ea_result">, + []>, RegConstraint<"$addr.ptrreg = $ea_result">, NoEncode<"$ea_result">; } } @@ -783,32 +830,39 @@ def LFDUX : XForm_1<31, 631, (outs F8RC:$rD, ptr_rc:$ea_result), let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LBZX : XForm_1<31, 87, (outs GPRC:$rD), (ins memrr:$src), "lbzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>; + [(set i32:$rD, (zextloadi8 xaddr:$src))]>; def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src), "lhax $rD, $src", LdStLHA, - [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>, + [(set i32:$rD, (sextloadi16 xaddr:$src))]>, PPC970_DGroup_Cracked; def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src), "lhzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>; + [(set i32:$rD, (zextloadi16 xaddr:$src))]>; def LWZX : XForm_1<31, 23, (outs GPRC:$rD), (ins memrr:$src), "lwzx $rD, $src", LdStLoad, - [(set GPRC:$rD, (load xaddr:$src))]>; + [(set i32:$rD, (load xaddr:$src))]>; def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src), "lhbrx $rD, $src", LdStLoad, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>; + [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>; def LWBRX : XForm_1<31, 534, (outs GPRC:$rD), (ins memrr:$src), "lwbrx $rD, $src", LdStLoad, - [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>; + [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>; def LFSX : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src), "lfsx $frD, $src", LdStLFD, - [(set F4RC:$frD, (load xaddr:$src))]>; + [(set f32:$frD, (load xaddr:$src))]>; def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), "lfdx $frD, $src", LdStLFD, - [(set F8RC:$frD, (load xaddr:$src))]>; + [(set f64:$frD, (load xaddr:$src))]>; + +def LFIWAX : XForm_25<31, 855, (outs F8RC:$frD), (ins memrr:$src), + "lfiwax $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwax xoaddr:$src))]>; +def LFIWZX : XForm_25<31, 887, (outs F8RC:$frD), (ins memrr:$src), + "lfiwzx $frD, $src", LdStLFD, + [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>; } //===----------------------------------------------------------------------===// @@ -819,137 +873,128 @@ def LFDX : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src), let PPC970_Unit = 2 in { def STB : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src), "stb $rS, $src", LdStStore, - [(truncstorei8 GPRC:$rS, iaddr:$src)]>; + [(truncstorei8 i32:$rS, iaddr:$src)]>; def STH : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src), "sth $rS, $src", LdStStore, - [(truncstorei16 GPRC:$rS, iaddr:$src)]>; + [(truncstorei16 i32:$rS, iaddr:$src)]>; def STW : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src), "stw $rS, $src", LdStStore, - [(store GPRC:$rS, iaddr:$src)]>; + [(store i32:$rS, iaddr:$src)]>; def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst), "stfs $rS, $dst", LdStSTFD, - [(store F4RC:$rS, iaddr:$dst)]>; + [(store f32:$rS, iaddr:$dst)]>; def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst), "stfd $rS, $dst", LdStSTFD, - [(store F8RC:$rS, iaddr:$dst)]>; + [(store f64:$rS, iaddr:$dst)]>; } // Unindexed (r+i) Stores with Update (preinc). -let PPC970_Unit = 2 in { -def STBU : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stbu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STHU : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "sthu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STWU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stwu $rS, $ptroff($ptrreg)", LdStStoreUpd, - [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfsu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F4RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; -def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS, - symbolLo:$ptroff, ptr_rc:$ptrreg), - "stfdu $rS, $ptroff($ptrreg)", LdStSTFDU, - [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, - iaddroff:$ptroff))]>, - RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">; +let PPC970_Unit = 2, mayStore = 1 in { +def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stbu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "sthu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFSU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memri:$dst), + "stfsu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; +def STFDU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memri:$dst), + "stfdu $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">; } +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STBU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STHU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STWU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFSU $rS, iaddroff:$ptroff, $ptrreg)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff), + (STFDU $rS, iaddroff:$ptroff, $ptrreg)>; // Indexed (r+r) Stores. -// let PPC970_Unit = 2 in { def STBX : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst), "stbx $rS, $dst", LdStStore, - [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, + [(truncstorei8 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STHX : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst), "sthx $rS, $dst", LdStStore, - [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, + [(truncstorei16 i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; def STWX : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst), "stwx $rS, $dst", LdStStore, - [(store GPRC:$rS, xaddr:$dst)]>, - PPC970_DGroup_Cracked; - -def STBUX : XForm_8<31, 247, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stbux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti8 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, + [(store i32:$rS, xaddr:$dst)]>, PPC970_DGroup_Cracked; -def STHUX : XForm_8<31, 439, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "sthux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_truncsti16 GPRC:$rS, - ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STWUX : XForm_8<31, 183, (outs ptr_rc:$ea_res), - (ins GPRC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stwux $rS, $ptroff, $ptrreg", LdStStoreUpd, - [(set ptr_rc:$ea_res, - (pre_store GPRC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFSUX : XForm_8<31, 695, (outs ptr_rc:$ea_res), - (ins F4RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfsux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F4RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - -def STFDUX : XForm_8<31, 759, (outs ptr_rc:$ea_res), - (ins F8RC:$rS, ptr_rc:$ptroff, ptr_rc:$ptrreg), - "stfdux $rS, $ptroff, $ptrreg", LdStSTFDU, - [(set ptr_rc:$ea_res, - (pre_store F8RC:$rS, ptr_rc:$ptrreg, xaddroff:$ptroff))]>, - RegConstraint<"$ptroff = $ea_res">, NoEncode<"$ea_res">, - PPC970_DGroup_Cracked; - def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst), "sthbrx $rS, $dst", LdStStore, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, + [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>, PPC970_DGroup_Cracked; def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst), "stwbrx $rS, $dst", LdStStore, - [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>, + [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>, PPC970_DGroup_Cracked; def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst), "stfiwx $frS, $dst", LdStSTFD, - [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>; + [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst), "stfsx $frS, $dst", LdStSTFD, - [(store F4RC:$frS, xaddr:$dst)]>; + [(store f32:$frS, xaddr:$dst)]>; def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst), "stfdx $frS, $dst", LdStSTFD, - [(store F8RC:$frS, xaddr:$dst)]>; + [(store f64:$frS, xaddr:$dst)]>; } +// Indexed (r+r) Stores with Update (preinc). +let PPC970_Unit = 2, mayStore = 1 in { +def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stbux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "sthux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins GPRC:$rS, memrr:$dst), + "stwux $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins F4RC:$rS, memrr:$dst), + "stfsux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins F8RC:$rS, memrr:$dst), + "stfdux $rS, $dst", LdStSTFDU, []>, + RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">, + PPC970_DGroup_Cracked; +} + +// Patterns to match the pre-inc stores. We can't put the patterns on +// the instruction definitions directly as ISel wants the address base +// and offset to be separate operands, not a single complex operand. +def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STBUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STHUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STWUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFSUX $rS, $ptrreg, $ptroff)>; +def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff), + (STFDUX $rS, $ptrreg, $ptroff)>; + def SYNC : XForm_24_sync<31, 598, (outs), (ins), "sync", LdStSync, [(int_ppc_sync)]>; @@ -959,68 +1004,66 @@ def SYNC : XForm_24_sync<31, 598, (outs), (ins), // let PPC970_Unit = 1 in { // FXU Operations. -def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), +def ADDI : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$imm), "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; -def ADDIL : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$imm), - "addi $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def ADDIC : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic $rD, $rA, $imm", IntGeneral, - [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>, + [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>, PPC970_DGroup_Cracked; def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "addic. $rD, $rA, $imm", IntGeneral, []>; } -def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm), +def ADDIS : DForm_2<15, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolHi:$imm), "addis $rD, $rA, $imm", IntSimple, - [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>; -def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym), + [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>; +let isCodeGenOnly = 1 in +def LA : DForm_2<14, (outs GPRC:$rD), (ins GPRC_NOR0:$rA, symbolLo:$sym), "la $rD, $sym($rA)", IntGeneral, - [(set GPRC:$rD, (add GPRC:$rA, + [(set i32:$rD, (add i32:$rA, (PPClo tglobaladdr:$sym, 0)))]>; def MULLI : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "mulli $rD, $rA, $imm", IntMulLI, - [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>; + [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>; let Defs = [CARRY] in { def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm), "subfic $rD, $rA, $imm", IntGeneral, - [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>; + [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in { def LI : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm), "li $rD, $imm", IntSimple, - [(set GPRC:$rD, immSExt16:$imm)]>; + [(set i32:$rD, immSExt16:$imm)]>; def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm), "lis $rD, $imm", IntSimple, - [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>; + [(set i32:$rD, imm16ShiftedSExt:$imm)]>; } } let PPC970_Unit = 1 in { // FXU Operations. def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "andi. $dst, $src1, $src2", IntGeneral, - [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>, + [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>, isDOT; def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "andis. $dst, $src1, $src2", IntGeneral, - [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>, + [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>, isDOT; def ORI : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "ori $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>; + [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>; def ORIS : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "oris $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>; + [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>; def XORI : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "xori $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>; + [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>; def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2), "xoris $dst, $src1, $src2", IntSimple, - [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>; + [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>; def NOP : DForm_4_zero<24, (outs), (ins), "nop", IntSimple, []>; def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm), @@ -1033,38 +1076,38 @@ def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2), let PPC970_Unit = 1 in { // FXU Operations. def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "nand $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>; def AND : XForm_6<31, 28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "and $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (and i32:$rS, i32:$rB))]>; def ANDC : XForm_6<31, 60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "andc $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>; + [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>; def OR : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "or $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (or i32:$rS, i32:$rB))]>; def NOR : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "nor $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>; def ORC : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "orc $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>; + [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>; def EQV : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "eqv $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>; + [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>; def XOR : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "xor $rA, $rS, $rB", IntSimple, - [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (xor i32:$rS, i32:$rB))]>; def SLW : XForm_6<31, 24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "slw $rA, $rS, $rB", IntGeneral, - [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>; def SRW : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "srw $rA, $rS, $rB", IntGeneral, - [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>; let Defs = [CARRY] in { def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB), "sraw $rA, $rS, $rB", IntShift, - [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>; + [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>; } } @@ -1072,17 +1115,17 @@ let PPC970_Unit = 1 in { // FXU Operations. let Defs = [CARRY] in { def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), "srawi $rA, $rS, $SH", IntShift, - [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>; + [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>; } def CNTLZW : XForm_11<31, 26, (outs GPRC:$rA), (ins GPRC:$rS), "cntlzw $rA, $rS", IntGeneral, - [(set GPRC:$rA, (ctlz GPRC:$rS))]>; + [(set i32:$rA, (ctlz i32:$rS))]>; def EXTSB : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS), "extsb $rA, $rS", IntSimple, - [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>; + [(set i32:$rA, (sext_inreg i32:$rS, i8))]>; def EXTSH : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS), "extsh $rA, $rS", IntSimple, - [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>; + [(set i32:$rA, (sext_inreg i32:$rS, i16))]>; def CMPW : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB), "cmpw $crD, $rA, $rB", IntCompare>; @@ -1100,16 +1143,54 @@ def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB), let Uses = [RM] in { def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB), "fctiwz $frD, $frB", FPGeneral, - [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>; + [(set f64:$frD, (PPCfctiwz f64:$frB))]>; + def FRSP : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB), "frsp $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fround F8RC:$frB))]>; + [(set f32:$frD, (fround f64:$frB))]>; + + // The frin -> nearbyint mapping is valid only in fast-math mode. + def FRIND : XForm_26<63, 392, (outs F8RC:$frD), (ins F8RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f64:$frD, (fnearbyint f64:$frB))]>; + def FRINS : XForm_26<63, 392, (outs F4RC:$frD), (ins F4RC:$frB), + "frin $frD, $frB", FPGeneral, + [(set f32:$frD, (fnearbyint f32:$frB))]>; + + // These pseudos expand to rint but also set FE_INEXACT when the result does + // not equal the argument. + let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR! + def FRINDrint : Pseudo<(outs F8RC:$frD), (ins F8RC:$frB), + "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>; + def FRINSrint : Pseudo<(outs F4RC:$frD), (ins F4RC:$frB), + "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>; + } + + def FRIPD : XForm_26<63, 456, (outs F8RC:$frD), (ins F8RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f64:$frD, (fceil f64:$frB))]>; + def FRIPS : XForm_26<63, 456, (outs F4RC:$frD), (ins F4RC:$frB), + "frip $frD, $frB", FPGeneral, + [(set f32:$frD, (fceil f32:$frB))]>; + def FRIZD : XForm_26<63, 424, (outs F8RC:$frD), (ins F8RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f64:$frD, (ftrunc f64:$frB))]>; + def FRIZS : XForm_26<63, 424, (outs F4RC:$frD), (ins F4RC:$frB), + "friz $frD, $frB", FPGeneral, + [(set f32:$frD, (ftrunc f32:$frB))]>; + def FRIMD : XForm_26<63, 488, (outs F8RC:$frD), (ins F8RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f64:$frD, (ffloor f64:$frB))]>; + def FRIMS : XForm_26<63, 488, (outs F4RC:$frD), (ins F4RC:$frB), + "frim $frD, $frB", FPGeneral, + [(set f32:$frD, (ffloor f32:$frB))]>; + def FSQRT : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB), "fsqrt $frD, $frB", FPSqrt, - [(set F8RC:$frD, (fsqrt F8RC:$frB))]>; + [(set f64:$frD, (fsqrt f64:$frB))]>; def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB), "fsqrts $frD, $frB", FPSqrt, - [(set F4RC:$frD, (fsqrt F4RC:$frB))]>; + [(set f32:$frD, (fsqrt f32:$frB))]>; } } @@ -1119,29 +1200,29 @@ let Uses = [RM] in { /// sneak into a d-group with a store). def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB), "fmr $frD, $frB", FPGeneral, - []>, // (set F4RC:$frD, F4RC:$frB) + []>, // (set f32:$frD, f32:$frB) PPC970_Unit_Pseudo; let PPC970_Unit = 3 in { // FPU Operations. // These are artificially split into two different forms, for 4/8 byte FP. def FABSS : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB), "fabs $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fabs F4RC:$frB))]>; + [(set f32:$frD, (fabs f32:$frB))]>; def FABSD : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB), "fabs $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fabs F8RC:$frB))]>; + [(set f64:$frD, (fabs f64:$frB))]>; def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB), "fnabs $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>; + [(set f32:$frD, (fneg (fabs f32:$frB)))]>; def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB), "fnabs $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>; + [(set f64:$frD, (fneg (fabs f64:$frB)))]>; def FNEGS : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB), "fneg $frD, $frB", FPGeneral, - [(set F4RC:$frD, (fneg F4RC:$frB))]>; + [(set f32:$frD, (fneg f32:$frB))]>; def FNEGD : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB), "fneg $frD, $frB", FPGeneral, - [(set F8RC:$frD, (fneg F8RC:$frB))]>; + [(set f64:$frD, (fneg f64:$frB))]>; } @@ -1161,6 +1242,7 @@ def CROR : XLForm_1<19, 449, (outs CRBITRC:$CRD), "cror $CRD, $CRA, $CRB", BrCR, []>; +let isCodeGenOnly = 1 in { def CRSET : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins), "creqv $dst, $dst, $dst", BrCR, []>; @@ -1178,6 +1260,7 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins), "crxor 6, 6, 6", BrCR, [(PPCcr6unset)]>; } +} // XFX-Form instructions. Instructions that deal with SPRs. // @@ -1186,7 +1269,7 @@ def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins), "mfctr $rT", SprMFSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; } -let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in { +let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in { def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS), "mtctr $rS", SprMTSPR>, PPC970_DGroup_First, PPC970_Unit_FXU; @@ -1213,6 +1296,29 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins), "mfspr $rT, 256", IntGeneral>, PPC970_DGroup_First, PPC970_Unit_FXU; +let isCodeGenOnly = 1 in { + def MTVRSAVEv : XFXForm_7_ext<31, 467, 256, + (outs VRSAVERC:$reg), (ins GPRC:$rS), + "mtspr 256, $rS", IntGeneral>, + PPC970_DGroup_Single, PPC970_Unit_FXU; + def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), + (ins VRSAVERC:$reg), + "mfspr $rT, 256", IntGeneral>, + PPC970_DGroup_First, PPC970_Unit_FXU; +} + +// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register, +// so we'll need to scavenge a register for it. +let mayStore = 1 in +def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F), + "#SPILL_VRSAVE", []>; + +// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously +// spilled), so we'll need to scavenge a register for it. +let mayLoad = 1 in +def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F), + "#RESTORE_VRSAVE", []>; + def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), "mtcrf $FXM, $rS", BrMCRX>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1227,6 +1333,7 @@ def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS), // instruction to keep the register allocator from becoming confused. // // FIXME: Make this a real Pseudo instruction when the JIT switches to MC. +let isCodeGenOnly = 1 in def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "#MFCRpseud", SprMFCR>, PPC970_MicroCode, PPC970_Unit_CRU; @@ -1239,38 +1346,29 @@ def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM), "mfocrf $rT, $FXM", SprMFCR>, PPC970_DGroup_First, PPC970_Unit_CRU; -// Instructions to manipulate FPSCR. Only long double handling uses these. -// FPSCR is not modelled; we use the SDNode Flag to keep things in order. +// Pseudo instruction to perform FADD in round-to-zero mode. +let usesCustomInserter = 1, Uses = [RM] in { + def FADDrtz: Pseudo<(outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "", + [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>; +} +// The above pseudo gets expanded to make use of the following instructions +// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level. let Uses = [RM], Defs = [RM] in { def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM), - "mtfsb0 $FM", IntMTFSB0, - [(PPCmtfsb0 (i32 imm:$FM))]>, + "mtfsb0 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM), - "mtfsb1 $FM", IntMTFSB0, - [(PPCmtfsb1 (i32 imm:$FM))]>, + "mtfsb1 $FM", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; - // MTFSF does not actually produce an FP result. We pretend it copies - // input reg B to the output. If we didn't do this it would look like the - // instruction had no outputs (because we aren't modelling the FPSCR) and - // it would be deleted. - def MTFSF : XFLForm<63, 711, (outs F8RC:$FRA), - (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB), - "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0, - [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), - F8RC:$rT, F8RC:$FRB))]>, + def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, F8RC:$rT), + "mtfsf $FM, $rT", IntMTFSB0, []>, PPC970_DGroup_Single, PPC970_Unit_FPU; } let Uses = [RM] in { def MFFS : XForm_42<63, 583, (outs F8RC:$rT), (ins), "mffs $rT", IntMFFS, - [(set F8RC:$rT, (PPCmffs))]>, - PPC970_DGroup_Single, PPC970_Unit_FPU; - def FADDrtz: AForm_2<63, 21, - (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), - "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>, + [(set f64:$rT, (PPCmffs))]>, PPC970_DGroup_Single, PPC970_Unit_FPU; } @@ -1281,61 +1379,61 @@ let PPC970_Unit = 1 in { // FXU Operations. // def ADD4 : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "add $rT, $rA, $rB", IntSimple, - [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (add i32:$rA, i32:$rB))]>; let Defs = [CARRY] in { def ADDC : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "addc $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (addc i32:$rA, i32:$rB))]>, PPC970_DGroup_Cracked; } def DIVW : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "divw $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "divwu $rT, $rA, $rB", IntDivW, - [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>, + [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>, PPC970_DGroup_First, PPC970_DGroup_Cracked; def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mulhw $rT, $rA, $rB", IntMulHW, - [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>; def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mulhwu $rT, $rA, $rB", IntMulHWU, - [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>; def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "mullw $rT, $rA, $rB", IntMulHW, - [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (mul i32:$rA, i32:$rB))]>; def SUBF : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subf $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>; + [(set i32:$rT, (sub i32:$rB, i32:$rA))]>; let Defs = [CARRY] in { def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subfc $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>, + [(set i32:$rT, (subc i32:$rB, i32:$rA))]>, PPC970_DGroup_Cracked; } def NEG : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA), "neg $rT, $rA", IntSimple, - [(set GPRC:$rT, (ineg GPRC:$rA))]>; + [(set i32:$rT, (ineg i32:$rA))]>; let Uses = [CARRY], Defs = [CARRY] in { def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "adde $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>; + [(set i32:$rT, (adde i32:$rA, i32:$rB))]>; def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, -1))]>; + [(set i32:$rT, (adde i32:$rA, -1))]>; def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA), "addze $rT, $rA", IntGeneral, - [(set GPRC:$rT, (adde GPRC:$rA, 0))]>; + [(set i32:$rT, (adde i32:$rA, 0))]>; def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB), "subfe $rT, $rA, $rB", IntGeneral, - [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>; + [(set i32:$rT, (sube i32:$rB, i32:$rA))]>; def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfme $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube -1, GPRC:$rA))]>; + [(set i32:$rT, (sube -1, i32:$rA))]>; def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA), "subfze $rT, $rA", IntGeneral, - [(set GPRC:$rT, (sube 0, GPRC:$rA))]>; + [(set i32:$rT, (sube 0, i32:$rA))]>; } } @@ -1347,43 +1445,41 @@ let Uses = [RM] in { def FMADD : AForm_1<63, 29, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB))]>; + [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>; def FMADDS : AForm_1<59, 29, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB))]>; + [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>; def FMSUB : AForm_1<63, 28, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fma F8RC:$FRA, F8RC:$FRC, (fneg F8RC:$FRB)))]>; + [(set f64:$FRT, + (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>; def FMSUBS : AForm_1<59, 28, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fma F4RC:$FRA, F4RC:$FRC, (fneg F4RC:$FRB)))]>; + [(set f32:$FRT, + (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>; def FNMADD : AForm_1<63, 31, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, - (fneg (fma F8RC:$FRA, F8RC:$FRC, F8RC:$FRB)))]>; + [(set f64:$FRT, + (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>; def FNMADDS : AForm_1<59, 31, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, - (fneg (fma F4RC:$FRA, F4RC:$FRC, F4RC:$FRB)))]>; + [(set f32:$FRT, + (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>; def FNMSUB : AForm_1<63, 30, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, - [(set F8RC:$FRT, (fneg (fma F8RC:$FRA, F8RC:$FRC, - (fneg F8RC:$FRB))))]>; + [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC, + (fneg f64:$FRB))))]>; def FNMSUBS : AForm_1<59, 30, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (fneg (fma F4RC:$FRA, F4RC:$FRC, - (fneg F4RC:$FRB))))]>; + [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC, + (fneg f32:$FRB))))]>; } // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code @@ -1392,50 +1488,50 @@ let Uses = [RM] in { def FSELD : AForm_1<63, 23, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>; + [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>; def FSELS : AForm_1<63, 23, (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral, - [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>; + [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>; let Uses = [RM] in { def FADD : AForm_2<63, 21, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fadd $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>; def FADDS : AForm_2<59, 21, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fadds $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>; def FDIV : AForm_2<63, 18, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fdiv $FRT, $FRA, $FRB", FPDivD, - [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>; def FDIVS : AForm_2<59, 18, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fdivs $FRT, $FRA, $FRB", FPDivS, - [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>; def FMUL : AForm_3<63, 25, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC), "fmul $FRT, $FRA, $FRC", FPFused, - [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRC))]>; + [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>; def FMULS : AForm_3<59, 25, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC), "fmuls $FRT, $FRA, $FRC", FPGeneral, - [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRC))]>; + [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>; def FSUB : AForm_2<63, 20, (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB), "fsub $FRT, $FRA, $FRB", FPAddSub, - [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>; + [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>; def FSUBS : AForm_2<59, 20, (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB), "fsubs $FRT, $FRA, $FRB", FPGeneral, - [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>; + [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>; } } let PPC970_Unit = 1 in { // FXU Operations. def ISEL : AForm_4<31, 15, - (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB, pred:$cond), + (outs GPRC:$rT), (ins GPRC_NOR0:$rA, GPRC:$rB, CRBITRC:$cond), "isel $rT, $rA, $rB, $cond", IntGeneral, []>; } @@ -1475,47 +1571,43 @@ def : Pat<(i32 imm:$imm), (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; // Implement the 'not' operation with the NOR instruction. -def NOT : Pat<(not GPRC:$in), - (NOR GPRC:$in, GPRC:$in)>; +def NOT : Pat<(not i32:$in), + (NOR $in, $in)>; // ADD an arbitrary immediate. -def : Pat<(add GPRC:$in, imm:$imm), - (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>; +def : Pat<(add i32:$in, imm:$imm), + (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>; // OR an arbitrary immediate. -def : Pat<(or GPRC:$in, imm:$imm), - (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +def : Pat<(or i32:$in, imm:$imm), + (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // XOR an arbitrary immediate. -def : Pat<(xor GPRC:$in, imm:$imm), - (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>; +def : Pat<(xor i32:$in, imm:$imm), + (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>; // SUBFIC -def : Pat<(sub immSExt16:$imm, GPRC:$in), - (SUBFIC GPRC:$in, imm:$imm)>; +def : Pat<(sub immSExt16:$imm, i32:$in), + (SUBFIC $in, imm:$imm)>; // SHL/SRL -def : Pat<(shl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>; -def : Pat<(srl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>; +def : Pat<(shl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>; +def : Pat<(srl i32:$in, (i32 imm:$imm)), + (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>; // ROTL -def : Pat<(rotl GPRC:$in, GPRC:$sh), - (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>; -def : Pat<(rotl GPRC:$in, (i32 imm:$imm)), - (RLWINM GPRC:$in, imm:$imm, 0, 31)>; +def : Pat<(rotl i32:$in, i32:$sh), + (RLWNM $in, $sh, 0, 31)>; +def : Pat<(rotl i32:$in, (i32 imm:$imm)), + (RLWINM $in, imm:$imm, 0, 31)>; // RLWNM -def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm), - (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; +def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm), + (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>; // Calls -def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)), - (BL_Darwin tglobaladdr:$dst)>; -def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)), - (BL_Darwin texternalsym:$dst)>; -def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)), - (BL_SVR4 tglobaladdr:$dst)>; -def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)), - (BL_SVR4 texternalsym:$dst)>; +def : Pat<(PPCcall (i32 tglobaladdr:$dst)), + (BL tglobaladdr:$dst)>; +def : Pat<(PPCcall (i32 texternalsym:$dst)), + (BL texternalsym:$dst)>; def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm), @@ -1538,28 +1630,28 @@ def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>; def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>; def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>; def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>; -def : Pat<(PPChi tglobaltlsaddr:$g, GPRC:$in), - (ADDIS GPRC:$in, tglobaltlsaddr:$g)>; -def : Pat<(PPClo tglobaltlsaddr:$g, GPRC:$in), - (ADDIL GPRC:$in, tglobaltlsaddr:$g)>; -def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)), - (ADDIS GPRC:$in, tglobaladdr:$g)>; -def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), - (ADDIS GPRC:$in, tconstpool:$g)>; -def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)), - (ADDIS GPRC:$in, tjumptable:$g)>; -def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)), - (ADDIS GPRC:$in, tblockaddress:$g)>; +def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in), + (ADDIS $in, tglobaltlsaddr:$g)>; +def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in), + (ADDI $in, tglobaltlsaddr:$g)>; +def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)), + (ADDIS $in, tglobaladdr:$g)>; +def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)), + (ADDIS $in, tconstpool:$g)>; +def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)), + (ADDIS $in, tjumptable:$g)>; +def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)), + (ADDIS $in, tblockaddress:$g)>; // Standard shifts. These are represented separately from the real shifts above // so that we can distinguish between shifts that allow 5-bit and 6-bit shift // amounts. -def : Pat<(sra GPRC:$rS, GPRC:$rB), - (SRAW GPRC:$rS, GPRC:$rB)>; -def : Pat<(srl GPRC:$rS, GPRC:$rB), - (SRW GPRC:$rS, GPRC:$rB)>; -def : Pat<(shl GPRC:$rS, GPRC:$rB), - (SLW GPRC:$rS, GPRC:$rB)>; +def : Pat<(sra i32:$rS, i32:$rB), + (SRAW $rS, $rB)>; +def : Pat<(srl i32:$rS, i32:$rB), + (SRW $rS, $rB)>; +def : Pat<(shl i32:$rS, i32:$rB), + (SLW $rS, $rB)>; def : Pat<(zextloadi1 iaddr:$src), (LBZ iaddr:$src)>; @@ -1582,8 +1674,8 @@ def : Pat<(f64 (extloadf32 iaddr:$src)), def : Pat<(f64 (extloadf32 xaddr:$src)), (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>; -def : Pat<(f64 (fextend F4RC:$src)), - (COPY_TO_REGCLASS F4RC:$src, F8RC)>; +def : Pat<(f64 (fextend f32:$src)), + (COPY_TO_REGCLASS $src, F8RC)>; // Memory barriers def : Pat<(membarrier (i32 imm /*ll*/), diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 045b375dd8..ee18eadf6e 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -37,9 +37,19 @@ class PPCFunctionInfo : public MachineFunctionInfo { /// PEI. bool MustSaveLR; + /// Does this function have any stack spills. + bool HasSpills; + + /// Does this function spill using instructions with only r+r (not r+i) + /// forms. + bool HasNonRISpills; + /// SpillsCR - Indicates whether CR is spilled in the current function. bool SpillsCR; + /// Indicates whether VRSAVE is spilled in the current function. + bool SpillsVRSAVE; + /// LRStoreRequired - The bool indicates whether there is some explicit use of /// the LR/LR8 stack slot that is not obvious from scanning the code. This /// requires that the code generator produce a store of LR to the stack on @@ -78,7 +88,10 @@ public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), + HasSpills(false), + HasNonRISpills(false), SpillsCR(false), + SpillsVRSAVE(false), LRStoreRequired(false), MinReservedArea(0), TailCallSPDelta(0), @@ -109,9 +122,18 @@ public: void setMustSaveLR(bool U) { MustSaveLR = U; } bool mustSaveLR() const { return MustSaveLR; } + void setHasSpills() { HasSpills = true; } + bool hasSpills() const { return HasSpills; } + + void setHasNonRISpills() { HasNonRISpills = true; } + bool hasNonRISpills() const { return HasNonRISpills; } + void setSpillsCR() { SpillsCR = true; } bool isCRSpilled() const { return SpillsCR; } + void setSpillsVRSAVE() { SpillsVRSAVE = true; } + bool isVRSAVESpilled() const { return SpillsVRSAVE; } + void setLRStoreRequired() { LRStoreRequired = true; } bool isLRStoreRequired() const { return LRStoreRequired; } diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index df245cc655..1d61a3a8ea 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -46,26 +46,8 @@ #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -namespace llvm { -cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger", - cl::init(false), - cl::desc("Disable PPC32 register scavenger"), - cl::Hidden); -cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger", - cl::init(false), - cl::desc("Disable PPC64 register scavenger"), - cl::Hidden); -} - using namespace llvm; -// FIXME (64-bit): Should be inlined. -bool -PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const { - return ((!DisablePPC32RS && !Subtarget.isPPC64()) || - (!DisablePPC64RS && Subtarget.isPPC64())); -} - PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, const TargetInstrInfo &tii) : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR, @@ -86,20 +68,20 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST, ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8; ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; - ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32; -} - -bool -PPCRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { - return requiresRegisterScavenging(MF); + ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; } - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) const { + if (Kind == 1) { + if (Subtarget.isPPC64()) + return &PPC::G8RC_NOX0RegClass; + return &PPC::GPRC_NOR0RegClass; + } + if (Subtarget.isPPC64()) return &PPC::G8RCRegClass; return &PPC::GPRCRegClass; @@ -123,12 +105,35 @@ PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask; } +const uint32_t* +PPCRegisterInfo::getNoPreservedMask() const { + // The naming here is inverted: The CSR_NoRegs_Altivec has the + // Altivec registers masked so that they're not saved and restored around + // instructions with this preserved mask. + + if (!Subtarget.hasAltivec()) + return CSR_NoRegs_Altivec_RegMask; + + if (Subtarget.isDarwin()) + return CSR_NoRegs_Darwin_RegMask; + return CSR_NoRegs_RegMask; +} + BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); const PPCFrameLowering *PPCFI = static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering()); - Reserved.set(PPC::R0); + // The ZERO register is not really a register, but the representation of r0 + // when used in instructions that treat r0 as the constant 0. + Reserved.set(PPC::ZERO); + Reserved.set(PPC::ZERO8); + + // The FP register is also not really a register, but is the representation + // of the frame pointer register used by ISD::FRAMEADDR. + Reserved.set(PPC::FP); + Reserved.set(PPC::FP8); + Reserved.set(PPC::R1); Reserved.set(PPC::LR); Reserved.set(PPC::LR8); @@ -139,35 +144,21 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R2); // System-reserved register Reserved.set(PPC::R13); // Small Data Area pointer register } - // Reserve R2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::R2); - } // On PPC64, r13 is the thread pointer. Never allocate this register. - // Note that this is over conservative, as it also prevents allocation of R31 - // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); - Reserved.set(PPC::R31); - Reserved.set(PPC::X0); Reserved.set(PPC::X1); Reserved.set(PPC::X13); - Reserved.set(PPC::X31); + + if (PPCFI->needsFP(MF)) + Reserved.set(PPC::X31); // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. if (Subtarget.isSVR4ABI()) { Reserved.set(PPC::X2); } - // Reserve X2 on Darwin to hack around the problem of save/restore of CR - // when the stack frame is too big to address directly; we need two regs. - // This is a hack. - if (Subtarget.isDarwinABI()) { - Reserved.set(PPC::X2); - } } if (PPCFI->needsFP(MF)) @@ -185,6 +176,8 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, switch (RC->getID()) { default: return 0; + case PPC::G8RC_NOX0RegClassID: + case PPC::GPRC_NOR0RegClassID: case PPC::G8RCRegClassID: case PPC::GPRCRegClassID: { unsigned FP = TFI->hasFP(MF) ? 1 : 0; @@ -199,38 +192,10 @@ PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -bool -PPCRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { - switch (RC->getID()) { - case PPC::G8RCRegClassID: - case PPC::GPRCRegClassID: - case PPC::F8RCRegClassID: - case PPC::F4RCRegClassID: - case PPC::VRRCRegClassID: - return true; - default: - return false; - } -} - //===----------------------------------------------------------------------===// // Stack Frame Processing methods //===----------------------------------------------------------------------===// -/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, - const TargetRegisterClass *RC, int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - // FIXME: move ARM callee-saved reg scan to target independent code, then - // search for already spilled CS register here. - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - /// lowerDynamicAlloc - Generate the code for allocating an object in the /// current frame. The sequence of code with be in the general form /// @@ -238,8 +203,7 @@ unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS, /// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size /// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation /// -void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { +void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const { // Get the instruction. MachineInstr &MI = *II; // Get the instruction's basic block. @@ -271,28 +235,16 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Fortunately, a frame greater than 32K is rare. const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - const TargetRegisterClass *RC = LP64 ? G8RC : GPRC; - - // FIXME (64-bit): Use "findScratchRegister" - unsigned Reg; - if (requiresRegisterScavenging(MF)) - Reg = findScratchRegister(II, RS, RC, SPAdj); - else - Reg = PPC::R0; + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) { BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg) .addReg(PPC::R31) .addImm(FrameSize); } else if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) - .addImm(0) - .addReg(PPC::X1); - else - BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0) - .addImm(0) - .addReg(PPC::X1); + BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg) + .addImm(0) + .addReg(PPC::X1); } else { BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg) .addImm(0) @@ -302,17 +254,10 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, // Grow the stack and update the stack pointer link, then determine the // address of new allocated space. if (LP64) { - if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part. - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(Reg, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - else - BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) - .addReg(PPC::X0, RegState::Kill) - .addReg(PPC::X1) - .addReg(MI.getOperand(1).getReg()); - + BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1) + .addReg(Reg, RegState::Kill) + .addReg(PPC::X1) + .addReg(MI.getOperand(1).getReg()); if (!MI.getOperand(1).isKill()) BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg()) .addReg(PPC::X1) @@ -354,23 +299,19 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II, /// stw rA, FI ; Store rA to the frame. /// void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, - unsigned FrameIndex, int SPAdj, - RegScavenger *RS) const { + unsigned FrameIndex) const { // Get the instruction. MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); DebugLoc dl = MI.getDebugLoc(); - // FIXME: Once LLVM supports creating virtual registers here, or the register - // scavenger can return multiple registers, stop using reserved registers - // here. - (void) SPAdj; - (void) RS; - bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned SrcReg = MI.getOperand(0).getReg(); // We need to store the CR in the low 4-bits of the saved value. First, issue @@ -380,16 +321,20 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, // If the saved register wasn't CR0, shift the bits left so that they are in // CR0's slot. - if (SrcReg != PPC::CR0) + if (SrcReg != PPC::CR0) { + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + // rlwinm rA, rA, ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) - .addReg(Reg, RegState::Kill) - .addImm(getPPCRegisterNumbering(SrcReg) * 4) + .addReg(Reg1, RegState::Kill) + .addImm(getEncodingValue(SrcReg) * 4) .addImm(0) .addImm(31); + } addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW)) - .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())), + .addReg(Reg, RegState::Kill), FrameIndex); // Discard the pseudo instruction. @@ -397,23 +342,19 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II, } void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, - unsigned FrameIndex, int SPAdj, - RegScavenger *RS) const { + unsigned FrameIndex) const { // Get the instruction. MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset> // Get the instruction's basic block. MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); DebugLoc dl = MI.getDebugLoc(); - // FIXME: Once LLVM supports creating virtual registers here, or the register - // scavenger can return multiple registers, stop using reserved registers - // here. - (void) SPAdj; - (void) RS; - bool LP64 = Subtarget.isPPC64(); - unsigned Reg = Subtarget.isDarwinABI() ? (LP64 ? PPC::X2 : PPC::R2) : - (LP64 ? PPC::X0 : PPC::R0); + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + + unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); unsigned DestReg = MI.getOperand(0).getReg(); assert(MI.definesRegister(DestReg) && "RESTORE_CR does not define its destination"); @@ -424,15 +365,67 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II, // If the reloaded register isn't CR0, shift the bits right so that they are // in the right CR's slot. if (DestReg != PPC::CR0) { - unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4; + unsigned Reg1 = Reg; + Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC); + + unsigned ShiftBits = getEncodingValue(DestReg)*4; // rlwinm r11, r11, 32-ShiftBits, 0, 31. BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg) - .addReg(Reg).addImm(32-ShiftBits).addImm(0) + .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0) .addImm(31); } BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg) - .addReg(Reg); + .addReg(Reg, RegState::Kill); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); + unsigned SrcReg = MI.getOperand(0).getReg(); + + BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg) + .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill())); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW)) + .addReg(Reg, RegState::Kill), + FrameIndex); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + // Get the instruction. + MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset> + // Get the instruction's basic block. + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + DebugLoc dl = MI.getDebugLoc(); + + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_VRSAVE does not define its destination"); + + addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ), + Reg), FrameIndex); + + BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg) + .addReg(Reg, RegState::Kill); // Discard the pseudo instruction. MBB.erase(II); @@ -494,19 +487,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Special case for dynamic alloca. if (FPSI && FrameIndex == FPSI && (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) { - lowerDynamicAlloc(II, SPAdj, RS); + lowerDynamicAlloc(II); return; } - // Special case for pseudo-ops SPILL_CR and RESTORE_CR. - if (requiresRegisterScavenging(MF)) { - if (OpC == PPC::SPILL_CR) { - lowerCRSpilling(II, FrameIndex, SPAdj, RS); - return; - } else if (OpC == PPC::RESTORE_CR) { - lowerCRRestore(II, FrameIndex, SPAdj, RS); - return; - } + // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc. + if (OpC == PPC::SPILL_CR) { + lowerCRSpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_CR) { + lowerCRRestore(II, FrameIndex); + return; + } else if (OpC == PPC::SPILL_VRSAVE) { + lowerVRSAVESpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_VRSAVE) { + lowerVRSAVERestore(II, FrameIndex); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). @@ -525,11 +522,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case PPC::LWA: case PPC::LD: case PPC::STD: - case PPC::STD_32: isIXAddr = true; break; } - + + // If the instruction is not present in ImmToIdxMap, then it has no immediate + // form (and must be r+r). + bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC); + // Now add the frame object offset to the offset from r1. int Offset = MFI->getObjectOffset(FrameIndex); if (!isIXAddr) @@ -553,7 +553,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // only "std" to a stack slot that is at least 4-byte aligned, but it can // happen in invalid code. if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm - (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { + (!noImmForm && + isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) { if (isIXAddr) Offset >>= 2; // The actual encoded value has the low two bits zero. MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); @@ -563,19 +564,17 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // The offset doesn't fit into a single register, scavenge one to build the // offset in. - unsigned SReg; - if (requiresRegisterScavenging(MF)) { - const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; - SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj); - } else - SReg = is64Bit ? PPC::X0 : PPC::R0; + const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; + const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; + const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC; + unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC), + SReg = MF.getRegInfo().createVirtualRegister(RC); // Insert a set of rA with the full offset value before the ld, st, or add - BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg) + BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi) .addImm(Offset >> 16); BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg) - .addReg(SReg, RegState::Kill) + .addReg(SRegHi, RegState::Kill) .addImm(Offset); // Convert into indexed form of the instruction: @@ -584,7 +583,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0 unsigned OperandBase; - if (OpC != TargetOpcode::INLINEASM) { + if (noImmForm) + OperandBase = 1; + else if (OpC != TargetOpcode::INLINEASM) { assert(ImmToIdxMap.count(OpC) && "No indexed form of load or store available!"); unsigned NewOpcode = ImmToIdxMap.find(OpC)->second; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 9840666242..7e6683eeb2 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -15,8 +15,8 @@ #ifndef POWERPC32_REGISTERINFO_H #define POWERPC32_REGISTERINFO_H +#include "llvm/ADT/DenseMap.h" #include "PPC.h" -#include <map> #define GET_REGINFO_HEADER #include "PPCGenRegisterInfo.inc" @@ -27,7 +27,7 @@ class TargetInstrInfo; class Type; class PPCRegisterInfo : public PPCGenRegisterInfo { - std::map<unsigned, unsigned> ImmToIdxMap; + DenseMap<unsigned, unsigned> ImmToIdxMap; const PPCSubtarget &Subtarget; const TargetInstrInfo &TII; public: @@ -44,23 +44,33 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const uint32_t *getNoPreservedMask() const; BitVector getReservedRegs(const MachineFunction &MF) const; - virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + /// We require the register scavenger. + bool requiresRegisterScavenging(const MachineFunction &MF) const { + return true; + } + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + return true; + } + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + return true; + } + + void lowerDynamicAlloc(MachineBasicBlock::iterator II) const; + void lowerCRSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerCRRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerVRSAVESpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerVRSAVERestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; - /// requiresRegisterScavenging - We require a register scavenger. - /// FIXME (64-bit): Should be inlined. - bool requiresRegisterScavenging(const MachineFunction &MF) const; - - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; - - void lowerDynamicAlloc(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const; - void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex, - int SPAdj, RegScavenger *RS) const; - void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex, - int SPAdj, RegScavenger *RS) const; bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, int &FrameIdx) const; void eliminateFrameIndex(MachineBasicBlock::iterator II, diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index 8ee9b1ec9f..57a25f5143 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -27,40 +27,40 @@ class PPCReg<string n> : Register<n> { // GPR - One of the 32 32-bit general-purpose registers class GPR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // GP8 - One of the 32 64-bit general-purpose registers class GP8<GPR SubReg, string n> : PPCReg<n> { - field bits<5> Num = SubReg.Num; + let HWEncoding = SubReg.HWEncoding; let SubRegs = [SubReg]; let SubRegIndices = [sub_32]; } // SPR - One of the 32-bit special-purpose registers class SPR<bits<10> num, string n> : PPCReg<n> { - field bits<10> Num = num; + let HWEncoding{9-0} = num; } // FPR - One of the 32 64-bit floating-point registers class FPR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // VR - One of the 32 128-bit vector registers class VR<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // CR - One of the 8 4-bit condition registers class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> { - field bits<3> Num = num; + let HWEncoding{2-0} = num; let SubRegs = subregs; } // CRBIT - One of the 32 1-bit condition register fields class CRBIT<bits<5> num, string n> : PPCReg<n> { - field bits<5> Num = num; + let HWEncoding{4-0} = num; } // General-purpose registers @@ -86,6 +86,14 @@ foreach Index = 0-31 in { DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>; } +// The reprsentation of r0 when treated as the constant 0. +def ZERO : GPR<0, "0">; +def ZERO8 : GP8<ZERO, "0">; + +// Representations of the frame pointer used by ISD::FRAMEADDR. +def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">; +def FP8 : GP8<FP, "**FRAME POINTER**">; + // Condition register bits def CR0LT : CRBIT< 0, "0">; def CR0GT : CRBIT< 1, "1">; @@ -164,11 +172,17 @@ def RM: SPR<512, "**ROUNDING MODE**">; // then nonvolatiles in reverse order since stmw/lmw save from rN to r31 def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12), (sequence "R%u", 30, 13), - R31, R0, R1, LR)>; + R31, R0, R1, FP)>; def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12), (sequence "X%u", 30, 14), - X31, X13, X0, X1, LR8)>; + X31, X13, X0, X1, FP8)>; + +// For some instructions r0 is special (representing the value 0 instead of +// the value in the r0 register), and we use these register subclasses to +// prevent r0 from being allocated for use by those instructions. +def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>; +def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>; // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4 // ABI the size of the Floating-point register save area is determined by the diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 18e4c07942..1b7150fd37 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -39,7 +39,12 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, , HasQPX(false) , HasFSQRT(false) , HasSTFIWX(false) + , HasLFIWAX(false) + , HasFPRND(false) + , HasFPCVT(false) , HasISEL(false) + , HasPOPCNTD(false) + , HasLDBRX(false) , IsBookE(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 15885bd2df..d5dfa1e8e1 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -78,7 +78,12 @@ protected: bool HasQPX; bool HasFSQRT; bool HasSTFIWX; + bool HasLFIWAX; + bool HasFPRND; + bool HasFPCVT; bool HasISEL; + bool HasPOPCNTD; + bool HasLDBRX; bool IsBookE; bool HasLazyResolverStubs; bool IsJITCodeModel; @@ -155,10 +160,15 @@ public: // Specific obvious features. bool hasFSQRT() const { return HasFSQRT; } bool hasSTFIWX() const { return HasSTFIWX; } + bool hasLFIWAX() const { return HasLFIWAX; } + bool hasFPRND() const { return HasFPRND; } + bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasQPX() const { return HasQPX; } bool hasMFOCRF() const { return HasMFOCRF; } bool hasISEL() const { return HasISEL; } + bool hasPOPCNTD() const { return HasPOPCNTD; } + bool hasLDBRX() const { return HasLDBRX; } bool isBookE() const { return IsBookE; } const Triple &getTargetTriple() const { return TargetTriple; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 5e9ad347d3..00037edafc 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -122,9 +122,8 @@ llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - // FIXME: PPC currently does not have custom popcnt lowering even though - // there is hardware support. Once this is fixed, update this function - // to reflect the real capabilities of the hardware. + if (ST->hasPOPCNTD() && TyWidth <= 64) + return PSK_FastHardware; return PSK_Software; } diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt index b6763aa738..9a7902a82d 100644 --- a/lib/Target/PowerPC/README.txt +++ b/lib/Target/PowerPC/README.txt @@ -1,7 +1,6 @@ //===- README.txt - Notes for improving PowerPC-specific code gen ---------===// TODO: -* gpr0 allocation * lmw/stmw pass a la arm load store optimizer for prolog/epilog ===-------------------------------------------------------------------------=== diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index e099a9fc31..0b01433cc9 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -23,6 +23,8 @@ class AMDGPUTargetMachine; // R600 Passes FunctionPass* createR600KernelParametersPass(const DataLayout *TD); FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm); +FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm); +FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm); // SI Passes FunctionPass *createSIAnnotateControlFlowPass(); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 5995b6f5e8..a266df535d 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -60,6 +60,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index a59c775272..e740348717 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -202,8 +202,8 @@ class Vector2_Build <ValueType vecType, RegisterClass vectorClass, (vecType (IMPLICIT_DEF)), elemClass:$sub0, sub0), elemClass:$sub1, sub1) >; -class Vector_Build <ValueType vecType, RegisterClass vectorClass, - ValueType elemType, RegisterClass elemClass> : Pat < +class Vector4_Build <ValueType vecType, RegisterClass vectorClass, + ValueType elemType, RegisterClass elemClass> : Pat < (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y), (elemType elemClass:$z), (elemType elemClass:$w))), (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG diff --git a/lib/Target/R600/AMDGPUMachineFunction.cpp b/lib/Target/R600/AMDGPUMachineFunction.cpp new file mode 100644 index 0000000000..0223ec8e4f --- /dev/null +++ b/lib/Target/R600/AMDGPUMachineFunction.cpp @@ -0,0 +1,22 @@ +#include "AMDGPUMachineFunction.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" + +namespace llvm { + +const char *AMDGPUMachineFunction::ShaderTypeAttribute = "ShaderType"; + +AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) : + MachineFunctionInfo() { + AttributeSet Set = MF.getFunction()->getAttributes(); + Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, + ShaderTypeAttribute); + + if (A.isStringAttribute()) { + StringRef Str = A.getValueAsString(); + if (Str.getAsInteger(0, ShaderType)) + llvm_unreachable("Can't parse shader type!"); + } +} + +} diff --git a/lib/Target/R600/AMDGPUMachineFunction.h b/lib/Target/R600/AMDGPUMachineFunction.h new file mode 100644 index 0000000000..21c8c51dae --- /dev/null +++ b/lib/Target/R600/AMDGPUMachineFunction.h @@ -0,0 +1,29 @@ +//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef AMDGPUMACHINEFUNCTION_H +#define AMDGPUMACHINEFUNCTION_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +class AMDGPUMachineFunction : public MachineFunctionInfo { +private: + static const char *ShaderTypeAttribute; +public: + AMDGPUMachineFunction(const MachineFunction &MF); + unsigned ShaderType; +}; + +} +#endif // AMDGPUMACHINEFUNCTION_H diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp index b723433c16..dea43b874c 100644 --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp @@ -17,6 +17,7 @@ #include "AMDGPU.h" #include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPass.h" @@ -40,13 +41,14 @@ typedef SmallVector<BBValuePair, 2> BBValueVector; typedef SmallPtrSet<BasicBlock *, 8> BBSet; -typedef DenseMap<PHINode *, BBValueVector> PhiMap; +typedef MapVector<PHINode *, BBValueVector> PhiMap; +typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap; + typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap; typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap; typedef DenseMap<BasicBlock *, Value *> BBPredicates; typedef DenseMap<BasicBlock *, BBPredicates> PredMap; typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap; -typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap; // The name for newly created blocks. diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 0185747544..e7ea876e2a 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -151,7 +151,9 @@ bool AMDGPUPassConfig::addPreEmitPass() { if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) { addPass(createAMDGPUCFGPreparationPass(*TM)); addPass(createAMDGPUCFGStructurizerPass(*TM)); + addPass(createR600EmitClauseMarkers(*TM)); addPass(createR600ExpandSpecialInstrsPass(*TM)); + addPass(createR600ControlFlowFinalizer(*TM)); addPass(&FinalizeMachineBundlesID); } else { addPass(createSILowerControlFlowPass(*TM)); diff --git a/lib/Target/R600/AMDILISelDAGToDAG.cpp b/lib/Target/R600/AMDILISelDAGToDAG.cpp index 0c7880d232..fa8f62de9c 100644 --- a/lib/Target/R600/AMDILISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDILISelDAGToDAG.cpp @@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode, SDValue Operand = Ops[OperandIdx[i] - 1]; switch (Operand.getOpcode()) { case AMDGPUISD::CONST_ADDRESS: { - if (i == 2) - break; SDValue CstOffset; - if (!Operand.getValueType().isVector() && - SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) { - Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); - Ops[SelIdx[i] - 1] = CstOffset; - return true; + if (Operand.getValueType().isVector() || + !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) + break; + + // Gather others constants values + std::vector<unsigned> Consts; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = OperandIdx[j]; + if (SrcIdx < 0) + break; + if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) { + if (Reg->getReg() == AMDGPU::ALU_CONST) { + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]); + Consts.push_back(Cst->getZExtValue()); + } + } } + + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset); + Consts.push_back(Cst->getZExtValue()); + if (!TII->fitsConstReadLimitations(Consts)) + break; + + Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32); + Ops[SelIdx[i] - 1] = CstOffset; + return true; } - break; case ISD::FNEG: if (NegIdx[i] < 0) break; diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 63c59e1cb5..8efba5846b 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen AMDGPUFrameLowering.cpp AMDGPUIndirectAddressing.cpp AMDGPUMCInstLower.cpp + AMDGPUMachineFunction.cpp AMDGPUSubtarget.cpp AMDGPUStructurizeCFG.cpp AMDGPUTargetMachine.cpp @@ -34,6 +35,8 @@ add_llvm_target(R600CodeGen AMDGPUConvertToISA.cpp AMDGPUInstrInfo.cpp AMDGPURegisterInfo.cpp + R600ControlFlowFinalizer.cpp + R600EmitClauseMarkers.cpp R600ExpandSpecialInstrs.cpp R600InstrInfo.cpp R600ISelLowering.cpp diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index 4d3d3e7945..b7cdd7c8cd 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -68,8 +68,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo() { //===--- Dwarf Emission Directives -----------------------------------===// HasLEB128 = true; SupportsDebugInformation = true; - ExceptionsType = ExceptionHandling::None; - DwarfUsesInlineInfoSection = false; DwarfSectionOffsetDirective = ".offset"; } diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index d20716000d..6ef4d40934 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -66,8 +66,6 @@ private: void EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, unsigned SelOpIdx, raw_ostream &OS) const; void EmitDst(const MCInst &MI, raw_ostream &OS) const; - void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const; void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const; void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const; @@ -103,7 +101,8 @@ enum InstrTypes { INSTR_FC, INSTR_NATIVE, INSTR_VTX, - INSTR_EXPORT + INSTR_EXPORT, + INSTR_CFALU }; enum FCInstr { @@ -140,9 +139,7 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII, void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { - if (isTexOp(MI.getOpcode())) { - EmitTexInstr(MI, Fixups, OS); - } else if (isFCOp(MI.getOpcode())){ + if (isFCOp(MI.getOpcode())){ EmitFCInstr(MI, OS); } else if (MI.getOpcode() == AMDGPU::RETURN || MI.getOpcode() == AMDGPU::BUNDLE || @@ -150,6 +147,10 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, return; } else { switch(MI.getOpcode()) { + case AMDGPU::STACK_SIZE: { + EmitByte(MI.getOperand(0).getImm(), OS); + break; + } case AMDGPU::RAT_WRITE_CACHELESS_32_eg: case AMDGPU::RAT_WRITE_CACHELESS_128_eg: { uint64_t inst = getBinaryCodeForInstr(MI, Fixups); @@ -175,6 +176,77 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(InstWord2, OS); break; } + case AMDGPU::TEX_LD: + case AMDGPU::TEX_GET_TEXTURE_RESINFO: + case AMDGPU::TEX_SAMPLE: + case AMDGPU::TEX_SAMPLE_C: + case AMDGPU::TEX_SAMPLE_L: + case AMDGPU::TEX_SAMPLE_C_L: + case AMDGPU::TEX_SAMPLE_LB: + case AMDGPU::TEX_SAMPLE_C_LB: + case AMDGPU::TEX_SAMPLE_G: + case AMDGPU::TEX_SAMPLE_C_G: + case AMDGPU::TEX_GET_GRADIENTS_H: + case AMDGPU::TEX_GET_GRADIENTS_V: + case AMDGPU::TEX_SET_GRADIENTS_H: + case AMDGPU::TEX_SET_GRADIENTS_V: { + unsigned Opcode = MI.getOpcode(); + bool HasOffsets = (Opcode == AMDGPU::TEX_LD); + unsigned OpOffset = HasOffsets ? 3 : 0; + int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); + int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); + + uint32_t SrcSelect[4] = {0, 1, 2, 3}; + uint32_t Offsets[3] = {0, 0, 0}; + uint64_t CoordType[4] = {1, 1, 1, 1}; + + if (HasOffsets) + for (unsigned i = 0; i < 3; i++) + Offsets[i] = MI.getOperand(i + 2).getImm(); + + if (TextureType == TEXTURE_RECT || + TextureType == TEXTURE_SHADOWRECT) { + CoordType[ELEMENT_X] = 0; + CoordType[ELEMENT_Y] = 0; + } + + if (TextureType == TEXTURE_1D_ARRAY || + TextureType == TEXTURE_SHADOW1D_ARRAY) { + if (Opcode == AMDGPU::TEX_SAMPLE_C_L || + Opcode == AMDGPU::TEX_SAMPLE_C_LB) { + CoordType[ELEMENT_Y] = 0; + } else { + CoordType[ELEMENT_Z] = 0; + SrcSelect[ELEMENT_Z] = ELEMENT_Y; + } + } else if (TextureType == TEXTURE_2D_ARRAY || + TextureType == TEXTURE_SHADOW2D_ARRAY) { + CoordType[ELEMENT_Z] = 0; + } + + + if ((TextureType == TEXTURE_SHADOW1D || + TextureType == TEXTURE_SHADOW2D || + TextureType == TEXTURE_SHADOWRECT || + TextureType == TEXTURE_SHADOW1D_ARRAY) && + Opcode != AMDGPU::TEX_SAMPLE_C_L && + Opcode != AMDGPU::TEX_SAMPLE_C_LB) { + SrcSelect[ELEMENT_W] = ELEMENT_Z; + } + + uint64_t Word01 = getBinaryCodeForInstr(MI, Fixups) | + CoordType[ELEMENT_X] << 60 | CoordType[ELEMENT_Y] << 61 | + CoordType[ELEMENT_Z] << 62 | CoordType[ELEMENT_W] << 63; + uint32_t Word2 = Sampler << 15 | SrcSelect[ELEMENT_X] << 20 | + SrcSelect[ELEMENT_Y] << 23 | SrcSelect[ELEMENT_Z] << 26 | + SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 | + Offsets[2] << 10; + + EmitByte(INSTR_TEX, OS); + Emit(Word01, OS); + Emit(Word2, OS); + break; + } case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportSwz: case AMDGPU::EG_ExportBuf: @@ -184,7 +256,29 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, Emit(Inst, OS); break; } - + case AMDGPU::CF_ALU: + case AMDGPU::CF_ALU_PUSH_BEFORE: { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_CFALU, OS); + Emit(Inst, OS); + break; + } + case AMDGPU::CF_TC: + case AMDGPU::CF_VC: + case AMDGPU::CF_CALL_FS: + return; + case AMDGPU::WHILE_LOOP: + case AMDGPU::END_LOOP: + case AMDGPU::LOOP_BREAK: + case AMDGPU::CF_CONTINUE: + case AMDGPU::CF_JUMP: + case AMDGPU::CF_ELSE: + case AMDGPU::POP: { + uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); + EmitByte(INSTR_NATIVE, OS); + Emit(Inst, OS); + break; + } default: EmitALUInstr(MI, Fixups, OS); break; @@ -334,99 +428,6 @@ void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned RegOpIdx, Emit(InlineConstant.i, OS); } -void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI, - SmallVectorImpl<MCFixup> &Fixups, - raw_ostream &OS) const { - - unsigned Opcode = MI.getOpcode(); - bool hasOffsets = (Opcode == AMDGPU::TEX_LD); - unsigned OpOffset = hasOffsets ? 3 : 0; - int64_t Resource = MI.getOperand(OpOffset + 2).getImm(); - int64_t Sampler = MI.getOperand(OpOffset + 3).getImm(); - int64_t TextureType = MI.getOperand(OpOffset + 4).getImm(); - unsigned srcSelect[4] = {0, 1, 2, 3}; - - // Emit instruction type - EmitByte(1, OS); - - // Emit instruction - EmitByte(getBinaryCodeForInstr(MI, Fixups), OS); - - // Emit resource id - EmitByte(Resource, OS); - - // Emit source register - EmitByte(getHWReg(MI.getOperand(1).getReg()), OS); - - // XXX: Emit src isRelativeAddress - EmitByte(0, OS); - - // Emit destination register - EmitByte(getHWReg(MI.getOperand(0).getReg()), OS); - - // XXX: Emit dst isRealtiveAddress - EmitByte(0, OS); - - // XXX: Emit dst select - EmitByte(0, OS); // X - EmitByte(1, OS); // Y - EmitByte(2, OS); // Z - EmitByte(3, OS); // W - - // XXX: Emit lod bias - EmitByte(0, OS); - - // XXX: Emit coord types - unsigned coordType[4] = {1, 1, 1, 1}; - - if (TextureType == TEXTURE_RECT - || TextureType == TEXTURE_SHADOWRECT) { - coordType[ELEMENT_X] = 0; - coordType[ELEMENT_Y] = 0; - } - - if (TextureType == TEXTURE_1D_ARRAY - || TextureType == TEXTURE_SHADOW1D_ARRAY) { - if (Opcode == AMDGPU::TEX_SAMPLE_C_L || Opcode == AMDGPU::TEX_SAMPLE_C_LB) { - coordType[ELEMENT_Y] = 0; - } else { - coordType[ELEMENT_Z] = 0; - srcSelect[ELEMENT_Z] = ELEMENT_Y; - } - } else if (TextureType == TEXTURE_2D_ARRAY - || TextureType == TEXTURE_SHADOW2D_ARRAY) { - coordType[ELEMENT_Z] = 0; - } - - for (unsigned i = 0; i < 4; i++) { - EmitByte(coordType[i], OS); - } - - // XXX: Emit offsets - if (hasOffsets) - for (unsigned i = 2; i < 5; i++) - EmitByte(MI.getOperand(i).getImm()<<1, OS); - else - EmitNullBytes(3, OS); - - // Emit sampler id - EmitByte(Sampler, OS); - - // XXX:Emit source select - if ((TextureType == TEXTURE_SHADOW1D - || TextureType == TEXTURE_SHADOW2D - || TextureType == TEXTURE_SHADOWRECT - || TextureType == TEXTURE_SHADOW1D_ARRAY) - && Opcode != AMDGPU::TEX_SAMPLE_C_L - && Opcode != AMDGPU::TEX_SAMPLE_C_LB) { - srcSelect[ELEMENT_W] = ELEMENT_Z; - } - - for (unsigned i = 0; i < 4; i++) { - EmitByte(srcSelect[i], OS); - } -} - void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const { // Emit instruction type diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index e27abccbe1..5af83209a0 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -39,8 +39,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { void operator=(const SIMCCodeEmitter &) LLVM_DELETED_FUNCTION; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; - const MCSubtargetInfo &STI; - MCContext &Ctx; /// \brief Can this operand also contain immediate values? bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; @@ -51,7 +49,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, const MCSubtargetInfo &sti, MCContext &ctx) - : MCII(mcii), MRI(mri), STI(sti), Ctx(ctx) { } + : MCII(mcii), MRI(mri) { } ~SIMCCodeEmitter() { } diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp new file mode 100644 index 0000000000..bd87d741ec --- /dev/null +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -0,0 +1,264 @@ +//===-- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass compute turns all control flow pseudo instructions into native one +/// computing their address on the fly ; it also sets STACK_SIZE info. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { + +class R600ControlFlowFinalizer : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + unsigned MaxFetchInst; + + bool isFetch(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::TEX_VTX_CONSTBUF: + case AMDGPU::TEX_VTX_TEXBUF: + case AMDGPU::TEX_LD: + case AMDGPU::TEX_GET_TEXTURE_RESINFO: + case AMDGPU::TEX_GET_GRADIENTS_H: + case AMDGPU::TEX_GET_GRADIENTS_V: + case AMDGPU::TEX_SET_GRADIENTS_H: + case AMDGPU::TEX_SET_GRADIENTS_V: + case AMDGPU::TEX_SAMPLE: + case AMDGPU::TEX_SAMPLE_C: + case AMDGPU::TEX_SAMPLE_L: + case AMDGPU::TEX_SAMPLE_C_L: + case AMDGPU::TEX_SAMPLE_LB: + case AMDGPU::TEX_SAMPLE_C_LB: + case AMDGPU::TEX_SAMPLE_G: + case AMDGPU::TEX_SAMPLE_C_G: + case AMDGPU::TXD: + case AMDGPU::TXD_SHADOW: + return true; + default: + return false; + } + } + + bool IsTrivialInst(MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::KILL: + case AMDGPU::RETURN: + return true; + default: + return false; + } + } + + MachineBasicBlock::iterator + MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned CfAddress) const { + MachineBasicBlock::iterator ClauseHead = I; + unsigned AluInstCount = 0; + for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { + if (IsTrivialInst(I)) + continue; + if (!isFetch(I)) + break; + AluInstCount ++; + if (AluInstCount > MaxFetchInst) + break; + } + BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), + TII->get(AMDGPU::CF_TC)) + .addImm(CfAddress) // ADDR + .addImm(AluInstCount); // COUNT + return I; + } + void CounterPropagateAddr(MachineInstr *MI, unsigned Addr) const { + switch (MI->getOpcode()) { + case AMDGPU::WHILE_LOOP: + MI->getOperand(0).setImm(Addr + 1); + break; + default: + MI->getOperand(0).setImm(Addr); + break; + } + } + void CounterPropagateAddr(std::set<MachineInstr *> MIs, unsigned Addr) + const { + for (std::set<MachineInstr *>::iterator It = MIs.begin(), E = MIs.end(); + It != E; ++It) { + MachineInstr *MI = *It; + CounterPropagateAddr(MI, Addr); + } + } + +public: + R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { + const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>(); + if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) + MaxFetchInst = 8; + else + MaxFetchInst = 16; + } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + unsigned MaxStack = 0; + unsigned CurrentStack = 0; + for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; + ++MB) { + MachineBasicBlock &MBB = *MB; + unsigned CfCount = 0; + std::vector<std::pair<unsigned, std::set<MachineInstr *> > > LoopStack; + std::vector<std::pair<unsigned, MachineInstr *> > IfThenElseStack; + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); + if (MFI->ShaderType == 1) { + BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), + TII->get(AMDGPU::CF_CALL_FS)); + CfCount++; + } + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E;) { + if (isFetch(I)) { + I = MakeFetchClause(MBB, I, 0); + CfCount++; + continue; + } + + MachineBasicBlock::iterator MI = I; + I++; + switch (MI->getOpcode()) { + case AMDGPU::CF_ALU_PUSH_BEFORE: + CurrentStack++; + MaxStack = std::max(MaxStack, CurrentStack); + case AMDGPU::KILLGT: + case AMDGPU::CF_ALU: + CfCount++; + break; + case AMDGPU::WHILELOOP: { + CurrentStack++; + MaxStack = std::max(MaxStack, CurrentStack); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::WHILE_LOOP)) + .addImm(0); + std::pair<unsigned, std::set<MachineInstr *> > Pair(CfCount, + std::set<MachineInstr *>()); + Pair.second.insert(MIb); + LoopStack.push_back(Pair); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::ENDLOOP: { + CurrentStack--; + std::pair<unsigned, std::set<MachineInstr *> > Pair = + LoopStack.back(); + LoopStack.pop_back(); + CounterPropagateAddr(Pair.second, CfCount); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::END_LOOP)) + .addImm(Pair.first + 1); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::IF_PREDICATE_SET: { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::CF_JUMP)) + .addImm(0) + .addImm(0); + std::pair<unsigned, MachineInstr *> Pair(CfCount, MIb); + IfThenElseStack.push_back(Pair); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::ELSE: { + std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back(); + IfThenElseStack.pop_back(); + CounterPropagateAddr(Pair.second, CfCount); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::CF_ELSE)) + .addImm(0) + .addImm(1); + std::pair<unsigned, MachineInstr *> NewPair(CfCount, MIb); + IfThenElseStack.push_back(NewPair); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::ENDIF: { + CurrentStack--; + std::pair<unsigned, MachineInstr *> Pair = IfThenElseStack.back(); + IfThenElseStack.pop_back(); + CounterPropagateAddr(Pair.second, CfCount + 1); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) + .addImm(CfCount + 1) + .addImm(1); + MI->eraseFromParent(); + CfCount++; + break; + } + case AMDGPU::PREDICATED_BREAK: { + CurrentStack--; + CfCount += 3; + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_JUMP)) + .addImm(CfCount) + .addImm(1); + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::LOOP_BREAK)) + .addImm(0); + BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::POP)) + .addImm(CfCount) + .addImm(1); + LoopStack.back().second.insert(MIb); + MI->eraseFromParent(); + break; + } + case AMDGPU::CONTINUE: { + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), + TII->get(AMDGPU::CF_CONTINUE)) + .addImm(CfCount); + LoopStack.back().second.insert(MIb); + MI->eraseFromParent(); + CfCount++; + break; + } + default: + break; + } + } + BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), + TII->get(AMDGPU::STACK_SIZE)) + .addImm(MaxStack); + } + + return false; + } + + const char *getPassName() const { + return "R600 Control Flow Finalizer Pass"; + } +}; + +char R600ControlFlowFinalizer::ID = 0; + +} + + +llvm::FunctionPass *llvm::createR600ControlFlowFinalizer(TargetMachine &TM) { + return new R600ControlFlowFinalizer(TM); +} + diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp new file mode 100644 index 0000000000..7c7469a04b --- /dev/null +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -0,0 +1,253 @@ +//===-- R600EmitClauseMarkers.cpp - Emit CF_ALU ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Add CF_ALU. R600 Alu instructions are grouped in clause which can hold +/// 128 Alu instructions ; these instructions can access up to 4 prefetched +/// 4 lines of 16 registers from constant buffers. Such ALU clauses are +/// initiated by CF_ALU instructions. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "R600Defines.h" +#include "R600InstrInfo.h" +#include "R600MachineFunctionInfo.h" +#include "R600RegisterInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { + +class R600EmitClauseMarkersPass : public MachineFunctionPass { + +private: + static char ID; + const R600InstrInfo *TII; + + unsigned OccupiedDwords(MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + case AMDGPU::DOT4_eg_pseudo: + case AMDGPU::DOT4_r600_pseudo: + return 4; + case AMDGPU::KILL: + return 0; + default: + break; + } + + if(TII->isVector(*MI) || + TII->isCubeOp(MI->getOpcode()) || + TII->isReductionOp(MI->getOpcode())) + return 4; + + unsigned NumLiteral = 0; + for (MachineInstr::mop_iterator It = MI->operands_begin(), + E = MI->operands_end(); It != E; ++It) { + MachineOperand &MO = *It; + if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X) + ++NumLiteral; + } + return 1 + NumLiteral; + } + + bool isALU(const MachineInstr *MI) const { + if (MI->getOpcode() == AMDGPU::KILLGT) + return false; + if (TII->isALUInstr(MI->getOpcode())) + return true; + if (TII->isVector(*MI) || TII->isCubeOp(MI->getOpcode())) + return true; + switch (MI->getOpcode()) { + case AMDGPU::PRED_X: + case AMDGPU::INTERP_PAIR_XY: + case AMDGPU::INTERP_PAIR_ZW: + case AMDGPU::INTERP_VEC_LOAD: + case AMDGPU::COPY: + case AMDGPU::DOT4_eg_pseudo: + case AMDGPU::DOT4_r600_pseudo: + return true; + default: + return false; + } + } + + bool IsTrivialInst(MachineInstr *MI) const { + switch (MI->getOpcode()) { + case AMDGPU::KILL: + case AMDGPU::RETURN: + return true; + default: + return false; + } + } + + // Register Idx, then Const value + std::vector<std::pair<unsigned, unsigned> > ExtractConstRead(MachineInstr *MI) + const { + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + std::vector<std::pair<unsigned, unsigned> > Result; + + if (!TII->isALUInstr(MI->getOpcode())) + return Result; + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = TII->getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + TII->getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Result.push_back(std::pair<unsigned, unsigned>(SrcIdx, Const)); + } + } + return Result; + } + + std::pair<unsigned, unsigned> getAccessedBankLine(unsigned Sel) const { + // Sel is (512 + (kc_bank << 12) + ConstIndex) << 2 + // (See also R600ISelLowering.cpp) + // ConstIndex value is in [0, 4095]; + return std::pair<unsigned, unsigned>( + ((Sel >> 2) - 512) >> 12, // KC_BANK + // Line Number of ConstIndex + // A line contains 16 constant registers however KCX bank can lock + // two line at the same time ; thus we want to get an even line number. + // Line number can be retrieved with (>>4), using (>>5) <<1 generates + // an even number. + ((((Sel >> 2) - 512) & 4095) >> 5) << 1); + } + + bool SubstituteKCacheBank(MachineInstr *MI, + std::vector<std::pair<unsigned, unsigned> > &CachedConsts) const { + std::vector<std::pair<unsigned, unsigned> > UsedKCache; + std::vector<std::pair<unsigned, unsigned> > Consts = ExtractConstRead(MI); + assert(TII->isALUInstr(MI->getOpcode()) && "Can't assign Const"); + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned Sel = Consts[i].second; + unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31; + unsigned KCacheIndex = Index * 4 + Chan; + const std::pair<unsigned, unsigned> &BankLine = getAccessedBankLine(Sel); + if (CachedConsts.empty()) { + CachedConsts.push_back(BankLine); + UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); + continue; + } + if (CachedConsts[0] == BankLine) { + UsedKCache.push_back(std::pair<unsigned, unsigned>(0, KCacheIndex)); + continue; + } + if (CachedConsts.size() == 1) { + CachedConsts.push_back(BankLine); + UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); + continue; + } + if (CachedConsts[1] == BankLine) { + UsedKCache.push_back(std::pair<unsigned, unsigned>(1, KCacheIndex)); + continue; + } + return false; + } + + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + switch(UsedKCache[i].first) { + case 0: + MI->getOperand(Consts[i].first).setReg( + AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[i].second)); + break; + case 1: + MI->getOperand(Consts[i].first).setReg( + AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[i].second)); + break; + default: + llvm_unreachable("Wrong Cache Line"); + } + } + return true; + } + + MachineBasicBlock::iterator + MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { + MachineBasicBlock::iterator ClauseHead = I; + std::vector<std::pair<unsigned, unsigned> > KCacheBanks; + bool PushBeforeModifier = false; + unsigned AluInstCount = 0; + for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { + if (IsTrivialInst(I)) + continue; + if (!isALU(I)) + break; + if (I->getOpcode() == AMDGPU::PRED_X) { + if (TII->getFlagOp(I).getImm() & MO_FLAG_PUSH) + PushBeforeModifier = true; + AluInstCount ++; + continue; + } + if (TII->isALUInstr(I->getOpcode()) && + !SubstituteKCacheBank(I, KCacheBanks)) + break; + AluInstCount += OccupiedDwords(I); + if (AluInstCount > 124) + break; + } + unsigned Opcode = PushBeforeModifier ? + AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU; + BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode)) + .addImm(0) // ADDR + .addImm(KCacheBanks.empty()?0:KCacheBanks[0].first) // KB0 + .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].first) // KB1 + .addImm(KCacheBanks.empty()?0:2) // KM0 + .addImm((KCacheBanks.size() < 2)?0:2) // KM1 + .addImm(KCacheBanks.empty()?0:KCacheBanks[0].second) // KLINE0 + .addImm((KCacheBanks.size() < 2)?0:KCacheBanks[1].second) // KLINE1 + .addImm(AluInstCount); // COUNT + return I; + } + +public: + R600EmitClauseMarkersPass(TargetMachine &tm) : MachineFunctionPass(ID), + TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + MachineBasicBlock::iterator I = MBB.begin(); + if (I->getOpcode() == AMDGPU::CF_ALU) + continue; // BB was already parsed + for (MachineBasicBlock::iterator E = MBB.end(); I != E;) { + if (isALU(I)) + I = MakeALUClause(MBB, I); + else + ++I; + } + } + return false; + } + + const char *getPassName() const { + return "R600 Emit Clause Markers Pass"; + } +}; + +char R600EmitClauseMarkersPass::ID = 0; + +} + + +llvm::FunctionPass *llvm::createR600EmitClauseMarkers(TargetMachine &TM) { + return new R600EmitClauseMarkersPass(TM); +} + diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index a73691dd3c..53e6e51dd2 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -28,7 +28,6 @@ using namespace llvm; R600TargetLowering::R600TargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM), TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo())) { - setOperationAction(ISD::MUL, MVT::i64, Expand); addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass); addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass); addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass); @@ -58,7 +57,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom); - setOperationAction(ISD::FPOW, MVT::f32, Custom); setOperationAction(ISD::ROTL, MVT::i32, Custom); @@ -316,7 +314,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); - case ISD::FPOW: return LowerFPOW(Op, DAG); case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -918,15 +915,6 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, 2, DL); } -SDValue R600TargetLowering::LowerFPOW(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - EVT VT = Op.getValueType(); - SDValue LogBase = DAG.getNode(ISD::FLOG2, DL, VT, Op.getOperand(0)); - SDValue MulLogBase = DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), LogBase); - return DAG.getNode(ISD::FEXP2, DL, VT, MulLogBase); -} - /// XXX Only kernel functions are supported, so we can assume for now that /// every function is a kernel function, but in the future we should use /// separate calling conventions for kernel and non-kernel functions. diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 5cb4b912a1..2c09acb9af 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -59,7 +59,6 @@ private: SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFPOW(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index be3318a0b4..08650980fd 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const { (TargetFlags & R600_InstFlag::OP3)); } +bool +R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) + const { + assert (Consts.size() <= 12 && "Too many operands in instructions group"); + unsigned Pair1 = 0, Pair2 = 0; + for (unsigned i = 0, n = Consts.size(); i < n; ++i) { + unsigned ReadConstHalf = Consts[i] & 2; + unsigned ReadConstIndex = Consts[i] & (~3); + unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; + if (!Pair1) { + Pair1 = ReadHalfConst; + continue; + } + if (Pair1 == ReadHalfConst) + continue; + if (!Pair2) { + Pair2 = ReadHalfConst; + continue; + } + if (Pair2 != ReadHalfConst) + return false; + } + return true; +} + +bool +R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const { + std::vector<unsigned> Consts; + for (unsigned i = 0, n = MIs.size(); i < n; i++) { + const MachineInstr *MI = MIs[i]; + + const R600Operands::Ops OpTable[3][2] = { + {R600Operands::SRC0, R600Operands::SRC0_SEL}, + {R600Operands::SRC1, R600Operands::SRC1_SEL}, + {R600Operands::SRC2, R600Operands::SRC2_SEL}, + }; + + if (!isALUInstr(MI->getOpcode())) + continue; + + for (unsigned j = 0; j < 3; j++) { + int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); + if (SrcIdx < 0) + break; + if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) { + unsigned Const = MI->getOperand( + getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); + Consts.push_back(Const); + } + } + } + return fitsConstReadLimitations(Consts); +} + DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, const ScheduleDAG *DAG) const { const InstrItineraryData *II = TM->getInstrItineraryData(); diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index efe721c00c..bf9569e659 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -53,6 +53,9 @@ namespace llvm { /// \returns true if this \p Opcode represents an ALU instruction. bool isALUInstr(unsigned Opcode) const; + bool fitsConstReadLimitations(const std::vector<unsigned>&) const; + bool canBundle(const std::vector<MachineInstr *> &) const; + /// \breif Vector instructions are instructions that must fill all /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index c5fa3347dc..663b41a66d 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -234,6 +234,80 @@ class VTX_WORD1_GPR { let Word1{31} = SRF_MODE_ALL; } +class TEX_WORD0 { + field bits<32> Word0; + + bits<5> TEX_INST; + bits<2> INST_MOD; + bits<1> FETCH_WHOLE_QUAD; + bits<8> RESOURCE_ID; + bits<7> SRC_GPR; + bits<1> SRC_REL; + bits<1> ALT_CONST; + bits<2> RESOURCE_INDEX_MODE; + bits<2> SAMPLER_INDEX_MODE; + + let Word0{4-0} = TEX_INST; + let Word0{6-5} = INST_MOD; + let Word0{7} = FETCH_WHOLE_QUAD; + let Word0{15-8} = RESOURCE_ID; + let Word0{22-16} = SRC_GPR; + let Word0{23} = SRC_REL; + let Word0{24} = ALT_CONST; + let Word0{26-25} = RESOURCE_INDEX_MODE; + let Word0{28-27} = SAMPLER_INDEX_MODE; +} + +class TEX_WORD1 { + field bits<32> Word1; + + bits<7> DST_GPR; + bits<1> DST_REL; + bits<3> DST_SEL_X; + bits<3> DST_SEL_Y; + bits<3> DST_SEL_Z; + bits<3> DST_SEL_W; + bits<7> LOD_BIAS; + bits<1> COORD_TYPE_X; + bits<1> COORD_TYPE_Y; + bits<1> COORD_TYPE_Z; + bits<1> COORD_TYPE_W; + + let Word1{6-0} = DST_GPR; + let Word1{7} = DST_REL; + let Word1{11-9} = DST_SEL_X; + let Word1{14-12} = DST_SEL_Y; + let Word1{17-15} = DST_SEL_Z; + let Word1{20-18} = DST_SEL_W; + let Word1{27-21} = LOD_BIAS; + let Word1{28} = COORD_TYPE_X; + let Word1{29} = COORD_TYPE_Y; + let Word1{30} = COORD_TYPE_Z; + let Word1{31} = COORD_TYPE_W; +} + +class TEX_WORD2 { + field bits<32> Word2; + + bits<5> OFFSET_X; + bits<5> OFFSET_Y; + bits<5> OFFSET_Z; + bits<5> SAMPLER_ID; + bits<3> SRC_SEL_X; + bits<3> SRC_SEL_Y; + bits<3> SRC_SEL_Z; + bits<3> SRC_SEL_W; + + let Word2{4-0} = OFFSET_X; + let Word2{9-5} = OFFSET_Y; + let Word2{14-10} = OFFSET_Z; + let Word2{19-15} = SAMPLER_ID; + let Word2{22-20} = SRC_SEL_X; + let Word2{25-23} = SRC_SEL_Y; + let Word2{28-26} = SRC_SEL_Z; + let Word2{31-29} = SRC_SEL_W; +} + /* XXX: R600 subtarget uses a slightly different encoding than the other subtargets. We currently handle this in R600MCCodeEmitter, but we may @@ -277,9 +351,9 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, (ins WRITE:$write, OMOD:$omod, REL:$dst_rel, CLAMP:$clamp, R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), - !strconcat(opName, + !strconcat(" ", opName, "$clamp $dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " + "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -318,10 +392,10 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, R600_Reg32:$src0, NEG:$src0_neg, REL:$src0_rel, ABS:$src0_abs, SEL:$src0_sel, R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, ABS:$src1_abs, SEL:$src1_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), - !strconcat(opName, + !strconcat(" ", opName, "$clamp $update_exec_mask$update_pred$dst$write$dst_rel$omod, " - "$src0_neg$src0_abs$src0$src0_sel$src0_abs$src0_rel, " - "$src1_neg$src1_abs$src1$src1_sel$src1_abs$src1_rel, " + "$src0_neg$src0_abs$src0$src0_abs$src0_rel, " + "$src1_neg$src1_abs$src1$src1_abs$src1_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -356,10 +430,10 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern, R600_Reg32:$src1, NEG:$src1_neg, REL:$src1_rel, SEL:$src1_sel, R600_Reg32:$src2, NEG:$src2_neg, REL:$src2_rel, SEL:$src2_sel, LAST:$last, R600_Pred:$pred_sel, LITERAL:$literal), - !strconcat(opName, "$clamp $dst$dst_rel, " - "$src0_neg$src0$src0_sel$src0_rel, " - "$src1_neg$src1$src1_sel$src1_rel, " - "$src2_neg$src2$src2_sel$src2_rel, " + !strconcat(" ", opName, "$clamp $dst$dst_rel, " + "$src0_neg$src0$src0_rel, " + "$src1_neg$src1$src1_rel, " + "$src2_neg$src2$src2_rel, " "$literal $pred_sel$last"), pattern, itin>, @@ -386,12 +460,32 @@ class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern, class R600_TEX <bits<11> inst, string opName, list<dag> pattern, InstrItinClass itin = AnyALU> : InstR600 <inst, - (outs R600_Reg128:$dst), - (ins R600_Reg128:$src0, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget), - !strconcat(opName, "$dst, $src0, $resourceId, $samplerId, $textureTarget"), + (outs R600_Reg128:$DST_GPR), + (ins R600_Reg128:$SRC_GPR, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, i32imm:$textureTarget), + !strconcat(opName, "$DST_GPR, $SRC_GPR, $RESOURCE_ID, $SAMPLER_ID, $textureTarget"), pattern, - itin>{ - let Inst {10-0} = inst; + itin>, TEX_WORD0, TEX_WORD1, TEX_WORD2 { + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; + + let TEX_INST = inst{4-0}; + let SRC_REL = 0; + let DST_REL = 0; + let DST_SEL_X = 0; + let DST_SEL_Y = 1; + let DST_SEL_Z = 2; + let DST_SEL_W = 3; + let LOD_BIAS = 0; + + let INST_MOD = 0; + let FETCH_WHOLE_QUAD = 0; + let ALT_CONST = 0; + let SAMPLER_INDEX_MODE = 0; + + let COORD_TYPE_X = 0; + let COORD_TYPE_Y = 0; + let COORD_TYPE_Z = 0; + let COORD_TYPE_W = 0; } } // End mayLoad = 1, mayStore = 0, hasSideEffects = 0 @@ -671,6 +765,167 @@ class ExportBufInst : InstR600ISA<( let Inst{63-32} = Word1; } +//===----------------------------------------------------------------------===// +// Control Flow Instructions +//===----------------------------------------------------------------------===// + +class CF_ALU_WORD0 { + field bits<32> Word0; + + bits<22> ADDR; + bits<4> KCACHE_BANK0; + bits<4> KCACHE_BANK1; + bits<2> KCACHE_MODE0; + + let Word0{21-0} = ADDR; + let Word0{25-22} = KCACHE_BANK0; + let Word0{29-26} = KCACHE_BANK1; + let Word0{31-30} = KCACHE_MODE0; +} + +class CF_ALU_WORD1 { + field bits<32> Word1; + + bits<2> KCACHE_MODE1; + bits<8> KCACHE_ADDR0; + bits<8> KCACHE_ADDR1; + bits<7> COUNT; + bits<1> ALT_CONST; + bits<4> CF_INST; + bits<1> WHOLE_QUAD_MODE; + bits<1> BARRIER; + + let Word1{1-0} = KCACHE_MODE1; + let Word1{9-2} = KCACHE_ADDR0; + let Word1{17-10} = KCACHE_ADDR1; + let Word1{24-18} = COUNT; + let Word1{25} = ALT_CONST; + let Word1{29-26} = CF_INST; + let Word1{30} = WHOLE_QUAD_MODE; + let Word1{31} = BARRIER; +} + +class ALU_CLAUSE<bits<4> inst, string OpName> : AMDGPUInst <(outs), +(ins i32imm:$ADDR, i32imm:$KCACHE_BANK0, i32imm:$KCACHE_BANK1, i32imm:$KCACHE_MODE0, i32imm:$KCACHE_MODE1, +i32imm:$KCACHE_ADDR0, i32imm:$KCACHE_ADDR1, i32imm:$COUNT), +!strconcat(OpName, " $COUNT, @$ADDR, " +"KC0[CB$KCACHE_BANK0:$KCACHE_ADDR0-$KCACHE_ADDR0+32]" +", KC1[CB$KCACHE_BANK1:$KCACHE_ADDR1-$KCACHE_ADDR1+32]"), +[] >, CF_ALU_WORD0, CF_ALU_WORD1 { + field bits<64> Inst; + + let CF_INST = inst; + let ALT_CONST = 0; + let WHOLE_QUAD_MODE = 0; + let BARRIER = 1; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +class CF_WORD0 { + field bits<32> Word0; + + bits<24> ADDR; + bits<3> JUMPTABLE_SEL; + + let Word0{23-0} = ADDR; + let Word0{26-24} = JUMPTABLE_SEL; +} + +class CF_WORD1 { + field bits<32> Word1; + + bits<3> POP_COUNT; + bits<5> CF_CONST; + bits<2> COND; + bits<6> COUNT; + bits<1> VALID_PIXEL_MODE; + bits<8> CF_INST; + bits<1> BARRIER; + + let Word1{2-0} = POP_COUNT; + let Word1{7-3} = CF_CONST; + let Word1{9-8} = COND; + let Word1{15-10} = COUNT; + let Word1{20} = VALID_PIXEL_MODE; + let Word1{29-22} = CF_INST; + let Word1{31} = BARRIER; +} + +class CF_CLAUSE <bits<8> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs), +ins, AsmPrint, [] >, CF_WORD0, CF_WORD1 { + field bits<64> Inst; + + let CF_INST = inst; + let BARRIER = 1; + let JUMPTABLE_SEL = 0; + let CF_CONST = 0; + let VALID_PIXEL_MODE = 0; + let COND = 0; + + let Inst{31-0} = Word0; + let Inst{63-32} = Word1; +} + +def CF_TC : CF_CLAUSE<1, (ins i32imm:$ADDR, i32imm:$COUNT), +"TEX $COUNT @$ADDR"> { + let POP_COUNT = 0; +} + +def CF_VC : CF_CLAUSE<2, (ins i32imm:$ADDR, i32imm:$COUNT), +"VTX $COUNT @$ADDR"> { + let POP_COUNT = 0; +} + +def WHILE_LOOP : CF_CLAUSE<6, (ins i32imm:$ADDR), "LOOP_START_DX10 @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def END_LOOP : CF_CLAUSE<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def LOOP_BREAK : CF_CLAUSE<9, (ins i32imm:$ADDR), "LOOP_BREAK @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def CF_CONTINUE : CF_CLAUSE<8, (ins i32imm:$ADDR), "CONTINUE @$ADDR"> { + let POP_COUNT = 0; + let COUNT = 0; +} + +def CF_JUMP : CF_CLAUSE<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "JUMP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; +} + +def CF_ELSE : CF_CLAUSE<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "ELSE @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; +} + +def CF_CALL_FS : CF_CLAUSE<19, (ins), "CALL_FS"> { + let ADDR = 0; + let COUNT = 0; + let POP_COUNT = 0; +} + +def POP : CF_CLAUSE<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "POP @$ADDR POP:$POP_COUNT"> { + let COUNT = 0; +} + +def CF_ALU : ALU_CLAUSE<8, "ALU">; +def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">; + +def STACK_SIZE : AMDGPUInst <(outs), +(ins i32imm:$num), "nstack $num", [] > { + field bits<8> Inst; + bits<8> num; + let Inst = num; +} + let Predicates = [isR600toCayman] in { //===----------------------------------------------------------------------===// @@ -867,25 +1122,33 @@ def CNDGT_INT : R600_3OP < def TEX_LD : R600_TEX < 0x03, "TEX_LD", - [(set R600_Reg128:$dst, (int_AMDGPU_txf R600_Reg128:$src0, imm:$src1, imm:$src2, imm:$src3, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txf R600_Reg128:$SRC_GPR, + imm:$OFFSET_X, imm:$OFFSET_Y, imm:$OFFSET_Z, imm:$RESOURCE_ID, + imm:$SAMPLER_ID, imm:$textureTarget))] > { -let AsmString = "TEX_LD $dst, $src0, $src1, $src2, $src3, $resourceId, $samplerId, $textureTarget"; -let InOperandList = (ins R600_Reg128:$src0, i32imm:$src1, i32imm:$src2, i32imm:$src3, i32imm:$resourceId, i32imm:$samplerId, i32imm:$textureTarget); +let AsmString = "TEX_LD $DST_GPR, $SRC_GPR, $OFFSET_X, $OFFSET_Y, $OFFSET_Z," + "$RESOURCE_ID, $SAMPLER_ID, $textureTarget"; +let InOperandList = (ins R600_Reg128:$SRC_GPR, i32imm:$OFFSET_X, + i32imm:$OFFSET_Y, i32imm:$OFFSET_Z, i32imm:$RESOURCE_ID, i32imm:$SAMPLER_ID, + i32imm:$textureTarget); } def TEX_GET_TEXTURE_RESINFO : R600_TEX < 0x04, "TEX_GET_TEXTURE_RESINFO", - [(set R600_Reg128:$dst, (int_AMDGPU_txq R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txq R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_GET_GRADIENTS_H : R600_TEX < 0x07, "TEX_GET_GRADIENTS_H", - [(set R600_Reg128:$dst, (int_AMDGPU_ddx R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddx R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_GET_GRADIENTS_V : R600_TEX < 0x08, "TEX_GET_GRADIENTS_V", - [(set R600_Reg128:$dst, (int_AMDGPU_ddy R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_ddy R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SET_GRADIENTS_H : R600_TEX < @@ -900,32 +1163,38 @@ def TEX_SET_GRADIENTS_V : R600_TEX < def TEX_SAMPLE : R600_TEX < 0x10, "TEX_SAMPLE", - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C : R600_TEX < 0x18, "TEX_SAMPLE_C", - [(set R600_Reg128:$dst, (int_AMDGPU_tex R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_tex R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_L : R600_TEX < 0x11, "TEX_SAMPLE_L", - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C_L : R600_TEX < 0x19, "TEX_SAMPLE_C_L", - [(set R600_Reg128:$dst, (int_AMDGPU_txl R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txl R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_LB : R600_TEX < 0x12, "TEX_SAMPLE_LB", - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0,imm:$resourceId, imm:$samplerId, imm:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, imm:$textureTarget))] >; def TEX_SAMPLE_C_LB : R600_TEX < 0x1A, "TEX_SAMPLE_C_LB", - [(set R600_Reg128:$dst, (int_AMDGPU_txb R600_Reg128:$src0, imm:$resourceId, imm:$samplerId, TEX_SHADOW:$textureTarget))] + [(set R600_Reg128:$DST_GPR, (int_AMDGPU_txb R600_Reg128:$SRC_GPR, + imm:$RESOURCE_ID, imm:$SAMPLER_ID, TEX_SHADOW:$textureTarget))] >; def TEX_SAMPLE_G : R600_TEX < @@ -1141,6 +1410,7 @@ let Predicates = [isR600] in { def RECIP_UINT_r600 : RECIP_UINT_Common <0x78>; defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>; + def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL, R600_Reg32>; def TGSI_LIT_Z_r600 : TGSI_LIT_Z_Common<MUL_LIT_r600, LOG_CLAMPED_r600, EXP_IEEE_r600>; def : Pat<(fsqrt R600_Reg32:$src), @@ -1212,6 +1482,7 @@ def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>; def SIN_eg : SIN_Common<0x8D>; def COS_eg : COS_Common<0x8E>; +def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL, R600_Reg32>; def : SIN_PAT <SIN_eg>; def : COS_PAT <COS_eg>; def : Pat<(fsqrt R600_Reg32:$src), @@ -1540,13 +1811,14 @@ def MULLO_UINT_cm : MULLO_UINT_Common<0x91>; def MULHI_UINT_cm : MULHI_UINT_Common<0x92>; def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>; def EXP_IEEE_cm : EXP_IEEE_Common<0x81>; -def LOG_IEEE_ : LOG_IEEE_Common<0x83>; +def LOG_IEEE_cm : LOG_IEEE_Common<0x83>; def RECIP_CLAMPED_cm : RECIP_CLAMPED_Common<0x84>; def RECIPSQRT_IEEE_cm : RECIPSQRT_IEEE_Common<0x89>; def SIN_cm : SIN_Common<0x8D>; def COS_cm : COS_Common<0x8E>; } // End isVector = 1 +def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL, R600_Reg32>; def : SIN_PAT <SIN_cm>; def : COS_PAT <COS_cm>; @@ -1979,8 +2251,8 @@ def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 1, sub1>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 2, sub2>; def : Insert_Element <i32, v4i32, R600_Reg32, R600_Reg128, 3, sub3>; -def : Vector_Build <v4f32, R600_Reg128, f32, R600_Reg32>; -def : Vector_Build <v4i32, R600_Reg128, i32, R600_Reg32>; +def : Vector4_Build <v4f32, R600_Reg128, f32, R600_Reg32>; +def : Vector4_Build <v4i32, R600_Reg128, i32, R600_Reg32>; // bitconvert patterns diff --git a/lib/Target/R600/R600MachineFunctionInfo.cpp b/lib/Target/R600/R600MachineFunctionInfo.cpp index 40aec833ea..018b403633 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.cpp +++ b/lib/Target/R600/R600MachineFunctionInfo.cpp @@ -13,6 +13,6 @@ using namespace llvm; R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo() { - memset(Outputs, 0, sizeof(Outputs)); - } + : AMDGPUMachineFunction(MF) { } + + diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index 4b901f4bbc..99c1f91b09 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -14,19 +14,17 @@ #define R600MACHINEFUNCTIONINFO_H #include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "AMDGPUMachineFunction.h" #include <vector> namespace llvm { -class R600MachineFunctionInfo : public MachineFunctionInfo { - +class R600MachineFunctionInfo : public AMDGPUMachineFunction { public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector<unsigned, 4> LiveOuts; std::vector<unsigned> IndirectRegs; - SDNode *Outputs[16]; }; } // End llvm namespace diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index 19baef94c7..9074364bb3 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { CurInstKind = IDOther; CurEmitted = 0; OccupedSlotsMask = 15; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); InstKindLimit[IDAlu] = 120; // 120 minus 8 for security @@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { } } -class ConstPairs { -private: - unsigned XYPair; - unsigned ZWPair; -public: - ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) { - for (unsigned i = 0; i < 3; i++) { - unsigned ReadConstChan = ReadConst[i] & 3; - unsigned ReadConstIndex = ReadConst[i] & (~3); - if (ReadConstChan < 2) { - if (!XYPair) { - XYPair = ReadConstIndex; - } - } else { - if (!ZWPair) { - ZWPair = ReadConstIndex; - } - } - } - } - - bool isCompatibleWith(const ConstPairs& CP) const { - return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) && - (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair); - } -}; - -static -const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) { - unsigned ReadConsts[3] = {0, 0, 0}; - R600Operands::Ops OpTable[3][2] = { - {R600Operands::SRC0, R600Operands::SRC0_SEL}, - {R600Operands::SRC1, R600Operands::SRC1_SEL}, - {R600Operands::SRC2, R600Operands::SRC2_SEL}, - }; - - if (!TII->isALUInstr(MI.getOpcode())) - return ConstPairs(ReadConsts); - - for (unsigned i = 0; i < 3; i++) { - int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]); - if (SrcIdx < 0) - break; - if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) - ReadConsts[i] =MI.getOperand( - TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm(); - } - return ConstPairs(ReadConsts); -} - -bool -R600SchedStrategy::isBundleable(const MachineInstr& MI) { - const ConstPairs &MIPair = getPairs(TII, MI); - for (unsigned i = 0; i < 4; i++) { - if (!InstructionsGroupCandidate[i]) - continue; - const ConstPairs &IGPair = getPairs(TII, - *InstructionsGroupCandidate[i]->getInstr()); - if (!IGPair.isCompatibleWith(MIPair)) - return false; - } - return true; -} - SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) { if (Q.empty()) return NULL; for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end(); It != E; ++It) { SUnit *SU = *It; - if (isBundleable(*SU->getInstr())) { + InstructionsGroupCandidate.push_back(SU->getInstr()); + if (TII->canBundle(InstructionsGroupCandidate)) { + InstructionsGroupCandidate.pop_back(); Q.erase(It); return SU; + } else { + InstructionsGroupCandidate.pop_back(); } } return NULL; @@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() { DEBUG(dbgs() << "New Slot\n"); assert (OccupedSlotsMask && "Slot wasn't filled"); OccupedSlotsMask = 0; - memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate)); + InstructionsGroupCandidate.clear(); LoadAlu(); } @@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() { SUnit *SU = AttemptFillSlot(Chan); if (SU) { OccupedSlotsMask |= (1 << Chan); - InstructionsGroupCandidate[Chan] = SU; + InstructionsGroupCandidate.push_back(SU->getInstr()); return SU; } } diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index d74ff1e076..3d0367fd8e 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -98,7 +98,7 @@ public: virtual void releaseBottomNode(SUnit *SU); private: - SUnit *InstructionsGroupCandidate[4]; + std::vector<MachineInstr *> InstructionsGroupCandidate; int getInstKind(SUnit *SU); bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; @@ -112,7 +112,6 @@ private: void AssignSlot(MachineInstr *MI, unsigned Slot); SUnit* pickAlu(); SUnit* pickOther(int QID); - bool isBundleable(const MachineInstr& MI); void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst); }; diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index ce5994ca36..03f49761ea 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -43,6 +43,37 @@ foreach Index = 0-127 in { Index>; } +// KCACHE_BANK0 +foreach Index = 159-128 in { + foreach Chan = [ "X", "Y", "Z", "W" ] in { + // 32-bit Temporary Registers + def KC0_#Index#_#Chan : R600RegWithChan <"KC0["#Index#"-128]."#Chan, Index, Chan>; + } + // 128-bit Temporary Registers + def KC0_#Index#_XYZW : R600Reg_128 <"KC0["#Index#"-128].XYZW", + [!cast<Register>("KC0_"#Index#"_X"), + !cast<Register>("KC0_"#Index#"_Y"), + !cast<Register>("KC0_"#Index#"_Z"), + !cast<Register>("KC0_"#Index#"_W")], + Index>; +} + +// KCACHE_BANK1 +foreach Index = 191-160 in { + foreach Chan = [ "X", "Y", "Z", "W" ] in { + // 32-bit Temporary Registers + def KC1_#Index#_#Chan : R600RegWithChan <"KC1["#Index#"-160]."#Chan, Index, Chan>; + } + // 128-bit Temporary Registers + def KC1_#Index#_XYZW : R600Reg_128 <"KC1["#Index#"-160].XYZW", + [!cast<Register>("KC1_"#Index#"_X"), + !cast<Register>("KC1_"#Index#"_Y"), + !cast<Register>("KC1_"#Index#"_Z"), + !cast<Register>("KC1_"#Index#"_W")], + Index>; +} + + // Array Base Register holding input in FS foreach Index = 448-480 in { def ArrayBase#Index : R600Reg<"ARRAY_BASE", Index>; @@ -80,6 +111,38 @@ def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", } // End isAllocatable = 0 +def R600_KC0_X : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_X", 128, 159))>; + +def R600_KC0_Y : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_Y", 128, 159))>; + +def R600_KC0_Z : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_Z", 128, 159))>; + +def R600_KC0_W : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC0_%u_W", 128, 159))>; + +def R600_KC0 : RegisterClass <"AMDGPU", [f32, i32], 32, + (interleave R600_KC0_X, R600_KC0_Y, + R600_KC0_Z, R600_KC0_W)>; + +def R600_KC1_X : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_X", 160, 191))>; + +def R600_KC1_Y : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_Y", 160, 191))>; + +def R600_KC1_Z : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_Z", 160, 191))>; + +def R600_KC1_W : RegisterClass <"AMDGPU", [f32, i32], 32, + (add (sequence "KC1_%u_W", 160, 191))>; + +def R600_KC1 : RegisterClass <"AMDGPU", [f32, i32], 32, + (interleave R600_KC1_X, R600_KC1_Y, + R600_KC1_Z, R600_KC1_W)>; + def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add (sequence "T%u_X", 0, 127), AR_X)>; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 063f5faa63..6f0c307615 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -58,6 +58,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : computeRegisterProperties(); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); + setOperationAction(ISD::ADD, MVT::i64, Legal); setOperationAction(ISD::ADD, MVT::i32, Legal); @@ -69,7 +74,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SETCC); - setSchedulingPreference(Sched::Source); + setSchedulingPreference(Sched::RegPressure); } SDValue SITargetLowering::LowerFormalArguments( @@ -203,32 +208,23 @@ SDValue SITargetLowering::LowerFormalArguments( MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { - MachineRegisterInfo & MRI = BB->getParent()->getRegInfo(); - MachineBasicBlock::iterator I = MI; switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; - case AMDGPU::SI_WQM: - LowerSI_WQM(MI, *BB, I, MRI); - break; } return BB; } -void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const { - BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC); - - MI->eraseFromParent(); -} - EVT SITargetLowering::getSetCCResultType(EVT VT) const { return MVT::i1; } +MVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { + return MVT::i32; +} + //===----------------------------------------------------------------------===// // Custom DAG Lowering Operations //===----------------------------------------------------------------------===// @@ -488,22 +484,23 @@ bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op, MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); SDNode *Node = Op.getNode(); - int OpClass; + const TargetRegisterClass *OpClass; if (MachineSDNode *MN = dyn_cast<MachineSDNode>(Node)) { const MCInstrDesc &Desc = TII->get(MN->getMachineOpcode()); - OpClass = Desc.OpInfo[Op.getResNo()].RegClass; + int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass; + if (OpClassID == -1) + OpClass = getRegClassFor(Op.getSimpleValueType()); + else + OpClass = TRI->getRegClass(OpClassID); } else if (Node->getOpcode() == ISD::CopyFromReg) { RegisterSDNode *Reg = cast<RegisterSDNode>(Node->getOperand(1).getNode()); - OpClass = MRI.getRegClass(Reg->getReg())->getID(); + OpClass = MRI.getRegClass(Reg->getReg()); } else return false; - if (OpClass == -1) - return false; - - return TRI->getRegClass(RegClass)->hasSubClassEq(TRI->getRegClass(OpClass)); + return TRI->getRegClass(RegClass)->hasSubClassEq(OpClass); } /// \brief Make sure that we don't exeed the number of allowed scalars @@ -547,6 +544,13 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, unsigned NumDefs = Desc->getNumDefs(); unsigned NumOps = Desc->getNumOperands(); + // Commuted opcode if available + int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1; + const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev); + + assert(!DescRev || DescRev->getNumDefs() == NumDefs); + assert(!DescRev || DescRev->getNumOperands() == NumOps); + // e64 version if available, -1 otherwise int OpcodeE64 = AMDGPU::getVOPe64(Opcode); const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); @@ -599,41 +603,54 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, // Is this a VSrc or SSrc operand ? unsigned RegClass = Desc->OpInfo[Op].RegClass; - if (!isVSrc(RegClass) && !isSSrc(RegClass)) { + if (isVSrc(RegClass) || isSSrc(RegClass)) { + // Try to fold the immediates + if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { + // Folding didn't worked, make sure we don't hit the SReg limit + ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); + } + continue; + } + + if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) { - if (i == 1 && Desc->isCommutable() && - fitsRegClass(DAG, Ops[0], RegClass) && - foldImm(Ops[1], Immediate, ScalarSlotUsed)) { + unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass; + assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass)); - assert(isVSrc(Desc->OpInfo[NumDefs].RegClass) || - isSSrc(Desc->OpInfo[NumDefs].RegClass)); + // Test if it makes sense to swap operands + if (foldImm(Ops[1], Immediate, ScalarSlotUsed) || + (!fitsRegClass(DAG, Ops[1], RegClass) && + fitsRegClass(DAG, Ops[1], OtherRegClass))) { // Swap commutable operands SDValue Tmp = Ops[1]; Ops[1] = Ops[0]; Ops[0] = Tmp; - } else if (DescE64 && !Immediate) { - // Test if it makes sense to switch to e64 encoding - - RegClass = DescE64->OpInfo[Op].RegClass; - int32_t TmpImm = -1; - if ((isVSrc(RegClass) || isSSrc(RegClass)) && - foldImm(Ops[i], TmpImm, ScalarSlotUsed)) { - - Immediate = -1; - Promote2e64 = true; - Desc = DescE64; - DescE64 = 0; - } + Desc = DescRev; + DescRev = 0; + continue; } - continue; } - // Try to fold the immediates - if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { - // Folding didn't worked, make sure we don't hit the SReg limit - ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); + if (DescE64 && !Immediate) { + + // Test if it makes sense to switch to e64 encoding + unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass; + if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass)) + continue; + + int32_t TmpImm = -1; + if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) || + (!fitsRegClass(DAG, Ops[i], RegClass) && + fitsRegClass(DAG, Ops[1], OtherRegClass))) { + + // Switch to e64 encoding + Immediate = -1; + Promote2e64 = true; + Desc = DescE64; + DescE64 = 0; + } } } @@ -647,10 +664,7 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) Ops.push_back(Node->getOperand(i)); - // Either create a complete new or update the current instruction - if (Promote2e64) - return DAG.getMachineNode(OpcodeE64, Node->getDebugLoc(), - Node->getVTList(), Ops.data(), Ops.size()); - else - return DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + // Create a complete new instruction + return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(), + Node->getVTList(), Ops.data(), Ops.size()); } diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index 0411565ee3..5ad2f40f0f 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -24,9 +24,6 @@ class SITargetLowering : public AMDGPUTargetLowering { const SIInstrInfo * TII; const TargetRegisterInfo * TRI; - void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB, - MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -48,6 +45,7 @@ public: virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, MachineBasicBlock * BB) const; virtual EVT getSetCCResultType(EVT VT) const; + virtual MVT getScalarShiftAmountTy(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 67fbdf7be1..98bd3dbb66 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -302,21 +302,8 @@ static void increaseCounters(Counters &Dst, const Counters &Src) { Dst.Array[i] = std::max(Dst.Array[i], Src.Array[i]); } -bool SIInsertWaits::unorderedDefines(MachineInstr &MI) { - - uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::LGKM_CNT) - return true; - - if (TSFlags & SIInstrFlags::EXP_CNT) - return ExpInstrTypesSeen == 3; - - return false; -} - Counters SIInsertWaits::handleOperands(MachineInstr &MI) { - bool UnorderedDefines = unorderedDefines(MI); Counters Result = ZeroCounts; // For each register affected by this @@ -329,8 +316,7 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { if (Op.isDef()) { increaseCounters(Result, UsedRegs[j]); - if (UnorderedDefines) - increaseCounters(Result, DefinedRegs[j]); + increaseCounters(Result, DefinedRegs[j]); } if (Op.isUse()) diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index de2373b11a..0bfcef562f 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -65,6 +65,26 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Opcode; const int16_t *SubIndices; + if (AMDGPU::M0 == DestReg) { + // Check if M0 isn't already set to this value + for (MachineBasicBlock::reverse_iterator E = MBB.rend(), + I = MachineBasicBlock::reverse_iterator(MI); I != E; ++I) { + + if (!I->definesRegister(AMDGPU::M0)) + continue; + + unsigned Opc = I->getOpcode(); + if (Opc != TargetOpcode::COPY && Opc != AMDGPU::S_MOV_B32) + break; + + if (!I->readsRegister(SrcReg)) + break; + + // The copy isn't necessary + return; + } + } + if (AMDGPU::SReg_32RegClass.contains(DestReg)) { assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) @@ -138,6 +158,21 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } } +unsigned SIInstrInfo::commuteOpcode(unsigned Opcode) const { + + int NewOpc; + + // Try to map original to commuted opcode + if ((NewOpc = AMDGPU::getCommuteRev(Opcode)) != -1) + return NewOpc; + + // Try to map commuted to original opcode + if ((NewOpc = AMDGPU::getCommuteOrig(Opcode)) != -1) + return NewOpc; + + return Opcode; +} + MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { @@ -145,7 +180,12 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, !MI->getOperand(2).isReg()) return 0; - return TargetInstrInfo::commuteInstruction(MI, NewMI); + MI = TargetInstrInfo::commuteInstruction(MI, NewMI); + + if (MI) + MI->setDesc(get(commuteOpcode(MI->getOpcode()))); + + return MI; } MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg, diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 5789af5d21..d4e60e5086 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -35,6 +35,8 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + unsigned commuteOpcode(unsigned Opcode) const; + virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI=false) const; @@ -76,6 +78,8 @@ public: namespace AMDGPU { int getVOPe64(uint16_t Opcode); + int getCommuteRev(uint16_t Opcode); + int getCommuteOrig(uint16_t Opcode); } // End namespace AMDGPU diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 260c651dd4..617f0b871c 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -115,16 +115,17 @@ class SOPK_64 <bits<5> op, string opName, list<dag> pattern> : SOPK < opName#" $dst, $src0", pattern >; -multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass> { +multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass, + RegisterClass dstClass> { def _IMM : SMRD < op, 1, (outs dstClass:$dst), - (ins SReg_64:$sbase, i32imm:$offset), + (ins baseClass:$sbase, i32imm:$offset), asm#" $dst, $sbase, $offset", [] >; def _SGPR : SMRD < op, 0, (outs dstClass:$dst), - (ins SReg_64:$sbase, SReg_32:$soff), + (ins baseClass:$sbase, SReg_32:$soff), asm#" $dst, $sbase, $soff", [] >; } @@ -137,6 +138,11 @@ class VOP <string opName> { string OpName = opName; } +class VOP2_REV <string revOp, bit isOrig> { + string RevOp = revOp; + bit IsOrig = isOrig; +} + multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src, string opName, list<dag> pattern> { @@ -165,11 +171,11 @@ multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> : VOP1_Helper <op, VReg_64, VSrc_64, opName, pattern>; multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, - string opName, list<dag> pattern> { + string opName, list<dag> pattern, string revOp> { def _e32 : VOP2 < op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP <opName>; + >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _e64 : VOP3 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -178,23 +184,26 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc, i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP <opName> { + >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> { let SRC2 = SIOperand.ZERO; } } -multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> - : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern>; +multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> + : VOP2_Helper <op, VReg_32, VSrc_32, opName, pattern, revOp>; -multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> - : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern>; +multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> + : VOP2_Helper <op, VReg_64, VSrc_64, opName, pattern, revOp>; -multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { +multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern, + string revOp = opName> { def _e32 : VOP2 < op, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1), opName#"_e32 $dst, $src0, $src1", pattern - >, VOP <opName>; + >, VOP <opName>, VOP2_REV<revOp#"_e32", !eq(revOp, opName)>; def _e64 : VOP3b < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -203,7 +212,7 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern> { i32imm:$abs, i32imm:$clamp, i32imm:$omod, i32imm:$neg), opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] - >, VOP <opName> { + >, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> { let SRC2 = SIOperand.ZERO; /* the VOP2 variant puts the carry out into VCC, the VOP3 variant can write it into any SGPR. We currently don't use the carry out, @@ -304,7 +313,7 @@ class MIMG_Load_Helper <bits<7> op, string asm> : MIMG < op, (outs VReg_128:$vdata), (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, - i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_32:$vaddr, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, unknown:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp), asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", @@ -326,4 +335,22 @@ def getVOPe64 : InstrMapping { let ValueCols = [["8"]]; } +// Maps an original opcode to its commuted version +def getCommuteRev : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + +// Maps an commuted opcode to its original version +def getCommuteOrig : InstrMapping { + let FilterClass = "VOP2_REV"; + let RowFields = ["RevOp"]; + let ColFields = ["IsOrig"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 0ab9e4ec0c..4f734f9124 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -403,9 +403,9 @@ def BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMAT //def BUFFER_LOAD_SBYTE : MUBUF_ <0x00000009, "BUFFER_LOAD_SBYTE", []>; //def BUFFER_LOAD_USHORT : MUBUF_ <0x0000000a, "BUFFER_LOAD_USHORT", []>; //def BUFFER_LOAD_SSHORT : MUBUF_ <0x0000000b, "BUFFER_LOAD_SSHORT", []>; -//def BUFFER_LOAD_DWORD : MUBUF_ <0x0000000c, "BUFFER_LOAD_DWORD", []>; -//def BUFFER_LOAD_DWORDX2 : MUBUF_DWORDX2 <0x0000000d, "BUFFER_LOAD_DWORDX2", []>; -//def BUFFER_LOAD_DWORDX4 : MUBUF_DWORDX4 <0x0000000e, "BUFFER_LOAD_DWORDX4", []>; +def BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; +def BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; +def BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; //def BUFFER_STORE_BYTE : MUBUF_ <0x00000018, "BUFFER_STORE_BYTE", []>; //def BUFFER_STORE_SHORT : MUBUF_ <0x0000001a, "BUFFER_STORE_SHORT", []>; //def BUFFER_STORE_DWORD : MUBUF_ <0x0000001c, "BUFFER_STORE_DWORD", []>; @@ -458,17 +458,31 @@ def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORM let mayLoad = 1 in { -defm S_LOAD_DWORD : SMRD_Helper <0x00000000, "S_LOAD_DWORD", SReg_32>; +defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SReg_32>; +defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; -//def S_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000001, "S_LOAD_DWORDX2", []>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x00000002, "S_LOAD_DWORDX4", SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x00000003, "S_LOAD_DWORDX8", SReg_256>; -//def S_LOAD_DWORDX16 : SMRD_DWORDX16 <0x00000004, "S_LOAD_DWORDX16", []>; -//def S_BUFFER_LOAD_DWORD : SMRD_ <0x00000008, "S_BUFFER_LOAD_DWORD", []>; -//def S_BUFFER_LOAD_DWORDX2 : SMRD_DWORDX2 <0x00000009, "S_BUFFER_LOAD_DWORDX2", []>; -//def S_BUFFER_LOAD_DWORDX4 : SMRD_DWORDX4 <0x0000000a, "S_BUFFER_LOAD_DWORDX4", []>; -//def S_BUFFER_LOAD_DWORDX8 : SMRD_DWORDX8 <0x0000000b, "S_BUFFER_LOAD_DWORDX8", []>; -//def S_BUFFER_LOAD_DWORDX16 : SMRD_DWORDX16 <0x0000000c, "S_BUFFER_LOAD_DWORDX16", []>; +defm S_BUFFER_LOAD_DWORD : SMRD_Helper < + 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SReg_32 +>; + +defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < + 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64 +>; + +defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < + 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128 +>; + +defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < + 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256 +>; + +defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < + 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 +>; } // mayLoad = 1 @@ -790,13 +804,13 @@ let isCommutable = 1 in { defm V_ADD_F32 : VOP2_32 <0x00000003, "V_ADD_F32", [(set VReg_32:$dst, (fadd VSrc_32:$src0, VReg_32:$src1))] >; -} // End isCommutable = 1 defm V_SUB_F32 : VOP2_32 <0x00000004, "V_SUB_F32", [(set VReg_32:$dst, (fsub VSrc_32:$src0, VReg_32:$src1))] >; +defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", [], "V_SUB_F32">; +} // End isCommutable = 1 -defm V_SUBREV_F32 : VOP2_32 <0x00000005, "V_SUBREV_F32", []>; defm V_MAC_LEGACY_F32 : VOP2_32 <0x00000006, "V_MAC_LEGACY_F32", []>; let isCommutable = 1 in { @@ -834,16 +848,20 @@ defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; -} // End isCommutable = 1 +defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", + [(set VReg_32:$dst, (srl VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; +defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; -defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; -defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", []>; -defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; -defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", []>; -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; -defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", []>; +defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", + [(set VReg_32:$dst, (sra VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; +defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; -let isCommutable = 1 in { +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", + [(set VReg_32:$dst, (shl VSrc_32:$src0, (i32 VReg_32:$src1)))] +>; +defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", [(set VReg_32:$dst, (and VSrc_32:$src0, VReg_32:$src1))] @@ -864,25 +882,24 @@ defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; //defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; //defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; //defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; -let Defs = [VCC] in { // Carry-out goes to VCC -let isCommutable = 1 in { +let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [(set VReg_32:$dst, (add (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; -} // End isCommutable = 1 defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [(set VReg_32:$dst, (sub (i32 VSrc_32:$src0), (i32 VReg_32:$src1)))] >; +defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], "V_SUB_I32">; -defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", []>; let Uses = [VCC] in { // Carry-out comes from VCC defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", []>; defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", []>; -defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", []>; +defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], "V_SUBB_U32">; } // End Uses = [VCC] -} // End Defs = [VCC] +} // End isCommutable = 1, Defs = [VCC] + defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; @@ -955,14 +972,31 @@ def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; + +let isCommutable = 1 in { + def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; + +} // isCommutable = 1 + def : Pat < (mul VSrc_32:$src0, VReg_32:$src1), (V_MUL_LO_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) >; -def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; + +def : Pat < + (mulhu VSrc_32:$src0, VReg_32:$src1), + (V_MUL_HI_U32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) +>; + +def : Pat < + (mulhs VSrc_32:$src0, VReg_32:$src1), + (V_MUL_HI_I32 VSrc_32:$src0, VReg_32:$src1, (i32 0), 0, 0, 0, 0) +>; + def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; @@ -1051,17 +1085,6 @@ def LOAD_CONST : AMDGPUShaderInst < [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] >; -let usesCustomInserter = 1 in { - -def SI_WQM : InstSI < - (outs), - (ins), - "SI_WQM", - [(int_SI_wqm)] ->; - -} // end usesCustomInserter - // SI Psuedo instructions. These are used by the CFG structurizer pass // and should be lowered to ISA instructions prior to codegen. @@ -1133,6 +1156,31 @@ def SI_KILL : InstSI < } // end mayLoad = 1, mayStore = 1, hasSideEffects = 1 // Uses = [EXEC], Defs = [EXEC] +let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { + +def SI_INDIRECT_SRC : InstSI < + (outs VReg_32:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off), + "SI_INDIRECT_SRC $dst, $temp, $src, $idx, $off", + [] +>; + +class SI_INDIRECT_DST<RegisterClass rc> : InstSI < + (outs rc:$dst, SReg_64:$temp), + (ins unknown:$src, VSrc_32:$idx, i32imm:$off, VReg_32:$val), + "SI_INDIRECT_DST $dst, $temp, $src, $idx, $off, $val", + [] +> { + let Constraints = "$src = $dst"; +} + +def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>; +def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>; +def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>; +def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>; + +} // Uses = [EXEC,VCC,M0], Defs = [EXEC,VCC,M0] + } // end IsCodeGenOnly, isPseudo def : Pat< @@ -1165,10 +1213,9 @@ def : Pat < /* int_SI_sample for simple 1D texture lookup */ def : Pat < - (int_SI_sample imm:$writemask, (v1i32 VReg_32:$addr), + (int_SI_sample imm:$writemask, VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler, imm), - (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, - (i32 (COPY_TO_REGCLASS VReg_32:$addr, VReg_32)), + (IMAGE_SAMPLE imm:$writemask, 0, 0, 0, 0, 0, 0, 0, VReg_32:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1176,8 +1223,7 @@ class SamplePattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, imm), - (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1185,8 +1231,7 @@ class SampleRectPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_RECT), - (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 1, 0, 0, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1194,8 +1239,7 @@ class SampleArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_ARRAY), - (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1203,8 +1247,7 @@ class SampleShadowPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW), - (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 0, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1212,8 +1255,7 @@ class SampleShadowArrayPattern<Intrinsic name, MIMG opcode, RegisterClass addr_class, ValueType addr_type> : Pat < (name imm:$writemask, (addr_type addr_class:$addr), SReg_256:$rsrc, SReg_128:$sampler, TEX_SHADOW_ARRAY), - (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, - (EXTRACT_SUBREG addr_class:$addr, sub0), + (opcode imm:$writemask, 0, 0, 1, 0, 0, 0, 0, addr_class:$addr, SReg_256:$rsrc, SReg_128:$sampler) >; @@ -1241,22 +1283,83 @@ defm : SamplePatterns<VReg_128, v4i32>; defm : SamplePatterns<VReg_256, v8i32>; defm : SamplePatterns<VReg_512, v16i32>; -def : Extract_Element <f32, v4f32, VReg_128, 0, sub0>; -def : Extract_Element <f32, v4f32, VReg_128, 1, sub1>; -def : Extract_Element <f32, v4f32, VReg_128, 2, sub2>; -def : Extract_Element <f32, v4f32, VReg_128, 3, sub3>; +/********** ============================================ **********/ +/********** Extraction, Insertion, Building and Casting **********/ +/********** ============================================ **********/ -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 4, sub0>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 5, sub1>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 6, sub2>; -def : Insert_Element <f32, v4f32, VReg_32, VReg_128, 7, sub3>; +foreach Index = 0-2 in { + def Extract_Element_v2i32_#Index : Extract_Element < + i32, v2i32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2i32_#Index : Insert_Element < + i32, v2i32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v2f32_#Index : Extract_Element < + f32, v2f32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v2f32_#Index : Insert_Element < + f32, v2f32, VReg_32, VReg_64, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-3 in { + def Extract_Element_v4i32_#Index : Extract_Element < + i32, v4i32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4i32_#Index : Insert_Element < + i32, v4i32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v4f32_#Index : Extract_Element < + f32, v4f32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v4f32_#Index : Insert_Element < + f32, v4f32, VReg_32, VReg_128, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-7 in { + def Extract_Element_v8i32_#Index : Extract_Element < + i32, v8i32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8i32_#Index : Insert_Element < + i32, v8i32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v8f32_#Index : Extract_Element < + f32, v8f32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v8f32_#Index : Insert_Element < + f32, v8f32, VReg_32, VReg_256, Index, !cast<SubRegIndex>(sub#Index) + >; +} + +foreach Index = 0-15 in { + def Extract_Element_v16i32_#Index : Extract_Element < + i32, v16i32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16i32_#Index : Insert_Element < + i32, v16i32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + + def Extract_Element_v16f32_#Index : Extract_Element < + f32, v16f32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; + def Insert_Element_v16f32_#Index : Insert_Element < + f32, v16f32, VReg_32, VReg_512, Index, !cast<SubRegIndex>(sub#Index) + >; +} def : Vector1_Build <v1i32, VReg_32, i32, VReg_32>; def : Vector2_Build <v2i32, VReg_64, i32, VReg_32>; -def : Vector_Build <v4f32, VReg_128, f32, VReg_32>; -def : Vector_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector2_Build <v2f32, VReg_64, f32, VReg_32>; +def : Vector4_Build <v4i32, VReg_128, i32, VReg_32>; +def : Vector4_Build <v4f32, VReg_128, f32, VReg_32>; def : Vector8_Build <v8i32, VReg_256, i32, VReg_32>; +def : Vector8_Build <v8f32, VReg_256, f32, VReg_32>; def : Vector16_Build <v16i32, VReg_512, i32, VReg_32>; +def : Vector16_Build <v16f32, VReg_512, f32, VReg_32>; def : BitConvert <i32, f32, SReg_32>; def : BitConvert <i32, f32, VReg_32>; @@ -1340,8 +1443,7 @@ def : Pat < /********** ================== **********/ /* llvm.AMDGPU.pow */ -/* XXX: We are using IEEE MUL, not the 0 * anything = 0 MUL, is this correct? */ -def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_F32_e32, VReg_32>; +def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32, VReg_32>; def : Pat < (int_AMDGPU_div VSrc_32:$src0, VSrc_32:$src1), @@ -1389,6 +1491,24 @@ def : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 -1), SReg_64:$src0) >; +// 1. Offset as 8bit DWORD immediate +def : Pat < + (int_SI_load_const SReg_128:$sbase, IMM8bitDWORD:$offset), + (S_BUFFER_LOAD_DWORD_IMM SReg_128:$sbase, IMM8bitDWORD:$offset) +>; + +// 2. Offset loaded in an 32bit SGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, imm:$offset), + (S_BUFFER_LOAD_DWORD_SGPR SReg_128:$sbase, (S_MOV_B32 imm:$offset)) +>; + +// 3. Offset in an 32Bit VGPR +def : Pat < + (int_SI_load_const SReg_128:$sbase, VReg_32:$voff), + (BUFFER_LOAD_DWORD 0, 1, 0, 0, 0, 0, VReg_32:$voff, SReg_128:$sbase, 0, 0, 0) +>; + /********** ================== **********/ /********** VOP3 Patterns **********/ /********** ================== **********/ @@ -1426,4 +1546,62 @@ defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; +/********** ====================== **********/ +/********** Indirect adressing **********/ +/********** ====================== **********/ + +multiclass SI_INDIRECT_Pattern <RegisterClass rc, ValueType vt, + SI_INDIRECT_DST IndDst> { + // 1. Extract with offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off)) + >; + + // 2. Extract without offset + def : Pat< + (vector_extract (vt rc:$vec), + (i64 (zext (i32 VReg_32:$idx))) + ), + (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0)) + >; + + // 3. Insert with offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 (add VReg_32:$idx, imm:$off)))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, imm:$off, VReg_32:$val)) + >; + + // 4. Insert without offset + def : Pat< + (vector_insert (vt rc:$vec), (f32 VReg_32:$val), + (i64 (zext (i32 VReg_32:$idx))) + ), + (vt (IndDst (IMPLICIT_DEF), rc:$vec, VReg_32:$idx, 0, VReg_32:$val)) + >; +} + +defm : SI_INDIRECT_Pattern <VReg_64, v2f32, SI_INDIRECT_DST_V2>; +defm : SI_INDIRECT_Pattern <VReg_128, v4f32, SI_INDIRECT_DST_V4>; +defm : SI_INDIRECT_Pattern <VReg_256, v8f32, SI_INDIRECT_DST_V8>; +defm : SI_INDIRECT_Pattern <VReg_512, v16f32, SI_INDIRECT_DST_V16>; + +/********** =============== **********/ +/********** Conditions **********/ +/********** =============== **********/ + +def : Pat< + (i1 (setcc f32:$src0, f32:$src1, SETO)), + (V_CMP_O_F32_e64 f32:$src0, f32:$src1) +>; + +def : Pat< + (i1 (setcc f32:$src0, f32:$src1, SETUO)), + (V_CMP_U_F32_e64 f32:$src0, f32:$src1) +>; + } // End isSI predicate diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 7c23d1706b..0af378edfe 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -16,12 +16,10 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; - /* XXX: We may need a seperate intrinsic here for loading integer values */ - def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; - def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ; - def int_SI_wqm : Intrinsic <[], [], []>; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; - class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; def int_SI_sampleb : Sample; @@ -29,8 +27,8 @@ let TargetPrefix = "SI", isTarget = 1 in { /* Interpolation Intrinsics */ - def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>; - def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>; + def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrNoMem]>; /* Control flow Intrinsics */ diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index b215aa22db..2b60eb9fb3 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -66,6 +66,7 @@ private: static const unsigned SkipThreshold = 12; static char ID; + const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -84,9 +85,14 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + void LoadM0(MachineInstr &MI, MachineInstr *MovRel); + void IndirectSrc(MachineInstr &MI); + void IndirectDst(MachineInstr &MI); + public: SILowerControlFlowPass(TargetMachine &tm) : - MachineFunctionPass(ID), TII(tm.getInstrInfo()) { } + MachineFunctionPass(ID), TRI(tm.getRegisterInfo()), + TII(tm.getInstrInfo()) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -191,7 +197,8 @@ void SILowerControlFlowPass::Else(MachineInstr &MI) { unsigned Dst = MI.getOperand(0).getReg(); unsigned Src = MI.getOperand(1).getReg(); - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) + BuildMI(MBB, MBB.getFirstNonPHI(), DL, + TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst) .addReg(Src); // Saved EXEC BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) @@ -302,9 +309,108 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } +void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + MachineBasicBlock::iterator I = MI; + + unsigned Save = MI.getOperand(1).getReg(); + unsigned Idx = MI.getOperand(3).getReg(); + + if (AMDGPU::SReg_32RegClass.contains(Idx)) { + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(Idx); + MBB.insert(I, MovRel); + MI.eraseFromParent(); + return; + } + + assert(AMDGPU::SReg_64RegClass.contains(Save)); + assert(AMDGPU::VReg_32RegClass.contains(Idx)); + + // Save the EXEC mask + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) + .addReg(AMDGPU::EXEC); + + // Read the next variant into VCC (lower 32 bits) <- also loop target + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32_e32), AMDGPU::VCC) + .addReg(Idx); + + // Move index from VCC into M0 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(AMDGPU::VCC); + + // Compare the just read M0 value to all possible Idx values + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) + .addReg(AMDGPU::M0) + .addReg(Idx); + + // Update EXEC, save the original EXEC value to VCC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) + .addReg(AMDGPU::VCC); + + // Do the actual move + MBB.insert(I, MovRel); + + // Update EXEC, switch all done bits to 0 and all todo bits to 1 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); + + // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(-7) + .addReg(AMDGPU::EXEC); + + // Restore EXEC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addReg(Save); + + MI.eraseFromParent(); +} + +void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Vec = MI.getOperand(2).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst) + .addReg(TRI->getSubReg(Vec, AMDGPU::sub0) + Off) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Vec, RegState::Implicit); + + LoadM0(MI, MovRel); +} + +void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { + + MachineBasicBlock &MBB = *MI.getParent(); + DebugLoc DL = MI.getDebugLoc(); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Off = MI.getOperand(4).getImm(); + unsigned Val = MI.getOperand(5).getReg(); + + MachineInstr *MovRel = + BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32)) + .addReg(TRI->getSubReg(Dst, AMDGPU::sub0) + Off, RegState::Define) + .addReg(Val) + .addReg(AMDGPU::M0, RegState::Implicit) + .addReg(Dst, RegState::Implicit); + + LoadM0(MI, MovRel); +} + bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { bool HaveKill = false; + bool NeedWQM = false; unsigned Depth = 0; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); @@ -363,9 +469,33 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::S_BRANCH: Branch(MI); break; + + case AMDGPU::SI_INDIRECT_SRC: + IndirectSrc(MI); + break; + + case AMDGPU::SI_INDIRECT_DST_V2: + case AMDGPU::SI_INDIRECT_DST_V4: + case AMDGPU::SI_INDIRECT_DST_V8: + case AMDGPU::SI_INDIRECT_DST_V16: + IndirectDst(MI); + break; + + case AMDGPU::V_INTERP_P1_F32: + case AMDGPU::V_INTERP_P2_F32: + case AMDGPU::V_INTERP_MOV_F32: + NeedWQM = true; + break; + } } } + if (NeedWQM) { + MachineBasicBlock &MBB = MF.front(); + BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64), + AMDGPU::EXEC).addReg(AMDGPU::EXEC); + } + return true; } diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index 1a4e4cbbbb..ee0e30755f 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,25 +10,9 @@ #include "SIMachineFunctionInfo.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Function.h" using namespace llvm; -const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType"; - SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) - : MachineFunctionInfo(), - ShaderType(0), - PSInputAddr(0) { - - AttributeSet Set = MF.getFunction()->getAttributes(); - Attribute A = Set.getAttribute(AttributeSet::FunctionIndex, - ShaderTypeAttribute); - - if (A.isStringAttribute()) { - StringRef Str = A.getValueAsString(); - if (Str.getAsInteger(0, ShaderType)) - llvm_unreachable("Can't parse shader type!"); - } -} + : AMDGPUMachineFunction(MF), + PSInputAddr(0) { } diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 91a809b124..6da9f7f9a1 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -15,18 +15,15 @@ #ifndef SIMACHINEFUNCTIONINFO_H_ #define SIMACHINEFUNCTIONINFO_H_ -#include "llvm/CodeGen/MachineFunction.h" +#include "AMDGPUMachineFunction.h" namespace llvm { /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. -class SIMachineFunctionInfo : public MachineFunctionInfo { +class SIMachineFunctionInfo : public AMDGPUMachineFunction { public: - static const char *ShaderTypeAttribute; - SIMachineFunctionInfo(const MachineFunction &MF); - unsigned ShaderType; unsigned PSInputAddr; }; diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 88275c523f..99278ae8dc 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -30,6 +30,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } +unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + return RC->getNumRegs(); +} + const TargetRegisterClass * SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { switch (rc->getID()) { diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 40171e4450..caec228413 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -31,6 +31,9 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { virtual BitVector getReservedRegs(const MachineFunction &MF) const; + virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// \param RC is an AMDIL reg class. /// /// \returns the SI register class that is equivalent to \p RC. diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 3dcad506d2..4f14931a9c 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -158,15 +158,15 @@ def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>; def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>; // Register class for all vector registers (VGPRs + Interploation Registers) -def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>; +def VReg_32 : RegisterClass<"AMDGPU", [i32, f32, v1i32], 32, (add VGPR_32)>; -def VReg_64 : RegisterClass<"AMDGPU", [i64, v2i32], 64, (add VGPR_64)>; +def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>; -def VReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; -def VReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add VGPR_256)>; +def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 256, (add VGPR_256)>; -def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; +def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; //===----------------------------------------------------------------------===// // [SV]Src_* register classes, can have either an immediate or an register @@ -174,9 +174,9 @@ def VReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add VGPR_512)>; def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; -def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>; +def SSrc_64 : RegisterClass<"AMDGPU", [i64, f64, i1], 64, (add SReg_64)>; def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; -def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>; +def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 90b698d507..c3810b2a4d 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -182,11 +182,11 @@ multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> { def rr : F3_1<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>; + [(set i32:$dst, (OpNode i32:$b, i32:$c))]>; def ri : F3_2<2, Op3Val, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), !strconcat(OpcStr, " $b, $c, $dst"), - [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>; + [(set i32:$dst, (OpNode i32:$b, simm13:$c))]>; } /// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no @@ -243,10 +243,10 @@ let Predicates = [HasNoV9] in { // Only emit these in V8 mode. "!FpMOVD $src, $dst", []>; def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), "!FpNEGD $src, $dst", - [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; + [(set f64:$dst, (fneg f64:$src))]>; def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src), "!FpABSD $src, $dst", - [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; + [(set f64:$dst, (fabs f64:$src))]>; } // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after @@ -257,19 +257,16 @@ let Uses = [ICC], usesCustomInserter = 1 in { def SELECT_CC_Int_ICC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_ICC PSEUDO!", - [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F, - imm:$Cond))]>; + [(set i32:$dst, (SPselecticc i32:$T, i32:$F, imm:$Cond))]>; def SELECT_CC_FP_ICC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_ICC PSEUDO!", - [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F, - imm:$Cond))]>; + [(set f32:$dst, (SPselecticc f32:$T, f32:$F, imm:$Cond))]>; def SELECT_CC_DFP_ICC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_ICC PSEUDO!", - [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F, - imm:$Cond))]>; + [(set f64:$dst, (SPselecticc f64:$T, f64:$F, imm:$Cond))]>; } let usesCustomInserter = 1, Uses = [FCC] in { @@ -277,19 +274,16 @@ let usesCustomInserter = 1, Uses = [FCC] in { def SELECT_CC_Int_FCC : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond), "; SELECT_CC_Int_FCC PSEUDO!", - [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F, - imm:$Cond))]>; + [(set i32:$dst, (SPselectfcc i32:$T, i32:$F, imm:$Cond))]>; def SELECT_CC_FP_FCC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond), "; SELECT_CC_FP_FCC PSEUDO!", - [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F, - imm:$Cond))]>; + [(set f32:$dst, (SPselectfcc f32:$T, f32:$F, imm:$Cond))]>; def SELECT_CC_DFP_FCC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond), "; SELECT_CC_DFP_FCC PSEUDO!", - [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F, - imm:$Cond))]>; + [(set f64:$dst, (SPselectfcc f64:$T, f64:$F, imm:$Cond))]>; } @@ -309,111 +303,111 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { def LDSBrr : F3_1<3, 0b001001, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldsb [$addr], $dst", - [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>; + [(set i32:$dst, (sextloadi8 ADDRrr:$addr))]>; def LDSBri : F3_2<3, 0b001001, (outs IntRegs:$dst), (ins MEMri:$addr), "ldsb [$addr], $dst", - [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>; + [(set i32:$dst, (sextloadi8 ADDRri:$addr))]>; def LDSHrr : F3_1<3, 0b001010, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldsh [$addr], $dst", - [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>; + [(set i32:$dst, (sextloadi16 ADDRrr:$addr))]>; def LDSHri : F3_2<3, 0b001010, (outs IntRegs:$dst), (ins MEMri:$addr), "ldsh [$addr], $dst", - [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>; + [(set i32:$dst, (sextloadi16 ADDRri:$addr))]>; def LDUBrr : F3_1<3, 0b000001, (outs IntRegs:$dst), (ins MEMrr:$addr), "ldub [$addr], $dst", - [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>; + [(set i32:$dst, (zextloadi8 ADDRrr:$addr))]>; def LDUBri : F3_2<3, 0b000001, (outs IntRegs:$dst), (ins MEMri:$addr), "ldub [$addr], $dst", - [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>; + [(set i32:$dst, (zextloadi8 ADDRri:$addr))]>; def LDUHrr : F3_1<3, 0b000010, (outs IntRegs:$dst), (ins MEMrr:$addr), "lduh [$addr], $dst", - [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>; + [(set i32:$dst, (zextloadi16 ADDRrr:$addr))]>; def LDUHri : F3_2<3, 0b000010, (outs IntRegs:$dst), (ins MEMri:$addr), "lduh [$addr], $dst", - [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>; + [(set i32:$dst, (zextloadi16 ADDRri:$addr))]>; def LDrr : F3_1<3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRrr:$addr))]>; + [(set i32:$dst, (load ADDRrr:$addr))]>; def LDri : F3_2<3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "ld [$addr], $dst", - [(set IntRegs:$dst, (load ADDRri:$addr))]>; + [(set i32:$dst, (load ADDRri:$addr))]>; // Section B.2 - Load Floating-point Instructions, p. 92 def LDFrr : F3_1<3, 0b100000, (outs FPRegs:$dst), (ins MEMrr:$addr), "ld [$addr], $dst", - [(set FPRegs:$dst, (load ADDRrr:$addr))]>; + [(set f32:$dst, (load ADDRrr:$addr))]>; def LDFri : F3_2<3, 0b100000, (outs FPRegs:$dst), (ins MEMri:$addr), "ld [$addr], $dst", - [(set FPRegs:$dst, (load ADDRri:$addr))]>; + [(set f32:$dst, (load ADDRri:$addr))]>; def LDDFrr : F3_1<3, 0b100011, (outs DFPRegs:$dst), (ins MEMrr:$addr), "ldd [$addr], $dst", - [(set DFPRegs:$dst, (load ADDRrr:$addr))]>; + [(set f64:$dst, (load ADDRrr:$addr))]>; def LDDFri : F3_2<3, 0b100011, (outs DFPRegs:$dst), (ins MEMri:$addr), "ldd [$addr], $dst", - [(set DFPRegs:$dst, (load ADDRri:$addr))]>; + [(set f64:$dst, (load ADDRri:$addr))]>; // Section B.4 - Store Integer Instructions, p. 95 def STBrr : F3_1<3, 0b000101, (outs), (ins MEMrr:$addr, IntRegs:$src), "stb $src, [$addr]", - [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>; + [(truncstorei8 i32:$src, ADDRrr:$addr)]>; def STBri : F3_2<3, 0b000101, (outs), (ins MEMri:$addr, IntRegs:$src), "stb $src, [$addr]", - [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>; + [(truncstorei8 i32:$src, ADDRri:$addr)]>; def STHrr : F3_1<3, 0b000110, (outs), (ins MEMrr:$addr, IntRegs:$src), "sth $src, [$addr]", - [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>; + [(truncstorei16 i32:$src, ADDRrr:$addr)]>; def STHri : F3_2<3, 0b000110, (outs), (ins MEMri:$addr, IntRegs:$src), "sth $src, [$addr]", - [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>; + [(truncstorei16 i32:$src, ADDRri:$addr)]>; def STrr : F3_1<3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src), "st $src, [$addr]", - [(store IntRegs:$src, ADDRrr:$addr)]>; + [(store i32:$src, ADDRrr:$addr)]>; def STri : F3_2<3, 0b000100, (outs), (ins MEMri:$addr, IntRegs:$src), "st $src, [$addr]", - [(store IntRegs:$src, ADDRri:$addr)]>; + [(store i32:$src, ADDRri:$addr)]>; // Section B.5 - Store Floating-point Instructions, p. 97 def STFrr : F3_1<3, 0b100100, (outs), (ins MEMrr:$addr, FPRegs:$src), "st $src, [$addr]", - [(store FPRegs:$src, ADDRrr:$addr)]>; + [(store f32:$src, ADDRrr:$addr)]>; def STFri : F3_2<3, 0b100100, (outs), (ins MEMri:$addr, FPRegs:$src), "st $src, [$addr]", - [(store FPRegs:$src, ADDRri:$addr)]>; + [(store f32:$src, ADDRri:$addr)]>; def STDFrr : F3_1<3, 0b100111, (outs), (ins MEMrr:$addr, DFPRegs:$src), "std $src, [$addr]", - [(store DFPRegs:$src, ADDRrr:$addr)]>; + [(store f64:$src, ADDRrr:$addr)]>; def STDFri : F3_2<3, 0b100111, (outs), (ins MEMri:$addr, DFPRegs:$src), "std $src, [$addr]", - [(store DFPRegs:$src, ADDRri:$addr)]>; + [(store f64:$src, ADDRri:$addr)]>; // Section B.9 - SETHI Instruction, p. 104 def SETHIi: F2_1<0b100, (outs IntRegs:$dst), (ins i32imm:$src), "sethi $src, $dst", - [(set IntRegs:$dst, SETHIimm:$src)]>; + [(set i32:$dst, SETHIimm:$src)]>; // Section B.10 - NOP Instruction, p. 105 // (It's a special case of SETHI) @@ -426,7 +420,7 @@ defm AND : F3_12<"and", 0b000001, and>; def ANDNrr : F3_1<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "andn $b, $c, $dst", - [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>; + [(set i32:$dst, (and i32:$b, (not i32:$c)))]>; def ANDNri : F3_2<2, 0b000101, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "andn $b, $c, $dst", []>; @@ -436,7 +430,7 @@ defm OR : F3_12<"or", 0b000010, or>; def ORNrr : F3_1<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "orn $b, $c, $dst", - [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>; + [(set i32:$dst, (or i32:$b, (not i32:$c)))]>; def ORNri : F3_2<2, 0b000110, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "orn $b, $c, $dst", []>; @@ -445,7 +439,7 @@ defm XOR : F3_12<"xor", 0b000011, xor>; def XNORrr : F3_1<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c), "xnor $b, $c, $dst", - [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>; + [(set i32:$dst, (not (xor i32:$b, i32:$c)))]>; def XNORri : F3_2<2, 0b000111, (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c), "xnor $b, $c, $dst", []>; @@ -462,7 +456,7 @@ defm ADD : F3_12<"add", 0b000000, add>; def LEA_ADDri : F3_2<2, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr), "add ${addr:arith}, $dst", - [(set IntRegs:$dst, ADDRri:$addr)]>; + [(set i32:$dst, ADDRri:$addr)]>; let Defs = [ICC] in defm ADDCC : F3_12<"addcc", 0b010000, addc>; @@ -603,11 +597,11 @@ def FDTOI : F3_3<2, 0b110100, 0b011010010, def FSTOD : F3_3<2, 0b110100, 0b011001001, (outs DFPRegs:$dst), (ins FPRegs:$src), "fstod $src, $dst", - [(set DFPRegs:$dst, (fextend FPRegs:$src))]>; + [(set f64:$dst, (fextend f32:$src))]>; def FDTOS : F3_3<2, 0b110100, 0b011000110, (outs FPRegs:$dst), (ins DFPRegs:$src), "fdtos $src, $dst", - [(set FPRegs:$dst, (fround DFPRegs:$src))]>; + [(set f32:$dst, (fround f64:$src))]>; // Floating-point Move Instructions, p. 144 def FMOVS : F3_3<2, 0b110100, 0b000000001, @@ -616,22 +610,22 @@ def FMOVS : F3_3<2, 0b110100, 0b000000001, def FNEGS : F3_3<2, 0b110100, 0b000000101, (outs FPRegs:$dst), (ins FPRegs:$src), "fnegs $src, $dst", - [(set FPRegs:$dst, (fneg FPRegs:$src))]>; + [(set f32:$dst, (fneg f32:$src))]>; def FABSS : F3_3<2, 0b110100, 0b000001001, (outs FPRegs:$dst), (ins FPRegs:$src), "fabss $src, $dst", - [(set FPRegs:$dst, (fabs FPRegs:$src))]>; + [(set f32:$dst, (fabs f32:$src))]>; // Floating-point Square Root Instructions, p.145 def FSQRTS : F3_3<2, 0b110100, 0b000101001, (outs FPRegs:$dst), (ins FPRegs:$src), "fsqrts $src, $dst", - [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>; + [(set f32:$dst, (fsqrt f32:$src))]>; def FSQRTD : F3_3<2, 0b110100, 0b000101010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fsqrtd $src, $dst", - [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>; + [(set f64:$dst, (fsqrt f64:$src))]>; @@ -639,42 +633,42 @@ def FSQRTD : F3_3<2, 0b110100, 0b000101010, def FADDS : F3_3<2, 0b110100, 0b001000001, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fadds $src1, $src2, $dst", - [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fadd f32:$src1, f32:$src2))]>; def FADDD : F3_3<2, 0b110100, 0b001000010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "faddd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fadd f64:$src1, f64:$src2))]>; def FSUBS : F3_3<2, 0b110100, 0b001000101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsubs $src1, $src2, $dst", - [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fsub f32:$src1, f32:$src2))]>; def FSUBD : F3_3<2, 0b110100, 0b001000110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fsubd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fsub f64:$src1, f64:$src2))]>; // Floating-point Multiply and Divide Instructions, p. 147 def FMULS : F3_3<2, 0b110100, 0b001001001, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fmuls $src1, $src2, $dst", - [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fmul f32:$src1, f32:$src2))]>; def FMULD : F3_3<2, 0b110100, 0b001001010, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fmuld $src1, $src2, $dst", - [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fmul f64:$src1, f64:$src2))]>; def FSMULD : F3_3<2, 0b110100, 0b001101001, (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fsmuld $src1, $src2, $dst", - [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1), - (fextend FPRegs:$src2)))]>; + [(set f64:$dst, (fmul (fextend f32:$src1), + (fextend f32:$src2)))]>; def FDIVS : F3_3<2, 0b110100, 0b001001101, (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2), "fdivs $src1, $src2, $dst", - [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>; + [(set f32:$dst, (fdiv f32:$src1, f32:$src2))]>; def FDIVD : F3_3<2, 0b110100, 0b001001110, (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2), "fdivd $src1, $src2, $dst", - [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>; + [(set f64:$dst, (fdiv f64:$src1, f64:$src2))]>; // Floating-point Compare Instructions, p. 148 // Note: the 2nd template arg is different for these guys. @@ -685,11 +679,11 @@ let Defs = [FCC] in { def FCMPS : F3_3<2, 0b110101, 0b001010001, (outs), (ins FPRegs:$src1, FPRegs:$src2), "fcmps $src1, $src2\n\tnop", - [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>; + [(SPcmpfcc f32:$src1, f32:$src2)]>; def FCMPD : F3_3<2, 0b110101, 0b001010010, (outs), (ins DFPRegs:$src1, DFPRegs:$src2), "fcmpd $src1, $src2\n\tnop", - [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>; + [(SPcmpfcc f64:$src1, f64:$src2)]>; } //===----------------------------------------------------------------------===// @@ -704,52 +698,45 @@ let Predicates = [HasV9], Constraints = "$T = $dst" in { def MOVICCrr : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselecticc i32:$F, i32:$T, imm:$cc))]>; def MOVICCri : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), "mov$cc %icc, $F, $dst", - [(set IntRegs:$dst, - (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselecticc simm11:$F, i32:$T, imm:$cc))]>; } let Uses = [FCC] in { def MOVFCCrr : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc), "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselectfcc i32:$F, i32:$T, imm:$cc))]>; def MOVFCCri : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc), "mov$cc %fcc0, $F, $dst", - [(set IntRegs:$dst, - (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>; + [(set i32:$dst, (SPselectfcc simm11:$F, i32:$T, imm:$cc))]>; } let Uses = [ICC] in { def FMOVS_ICC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), "fmovs$cc %icc, $F, $dst", - [(set FPRegs:$dst, - (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + [(set f32:$dst, + (SPselecticc f32:$F, f32:$T, imm:$cc))]>; def FMOVD_ICC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), "fmovd$cc %icc, $F, $dst", - [(set DFPRegs:$dst, - (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + [(set f64:$dst, (SPselecticc f64:$F, f64:$T, imm:$cc))]>; } let Uses = [FCC] in { def FMOVS_FCC : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc), "fmovs$cc %fcc0, $F, $dst", - [(set FPRegs:$dst, - (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>; + [(set f32:$dst, (SPselectfcc f32:$F, f32:$T, imm:$cc))]>; def FMOVD_FCC : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc), "fmovd$cc %fcc0, $F, $dst", - [(set DFPRegs:$dst, - (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>; + [(set f64:$dst, (SPselectfcc f64:$F, f64:$T, imm:$cc))]>; } } @@ -762,11 +749,11 @@ let Predicates = [HasV9] in { def FNEGD : F3_3<2, 0b110100, 0b000000110, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fnegd $src, $dst", - [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>; + [(set f64:$dst, (fneg f64:$src))]>; def FABSD : F3_3<2, 0b110100, 0b000001010, (outs DFPRegs:$dst), (ins DFPRegs:$src), "fabsd $src, $dst", - [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>; + [(set f64:$dst, (fabs f64:$src))]>; } // POPCrr - This does a ctpop of a 64-bit register. As such, we have to clear @@ -774,8 +761,8 @@ let Predicates = [HasV9] in { def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$dst), (ins IntRegs:$src), "popc $src, $dst", []>, Requires<[HasV9]>; -def : Pat<(ctpop IntRegs:$src), - (POPCrr (SLLri IntRegs:$src, 0))>; +def : Pat<(ctpop i32:$src), + (POPCrr (SLLri $src, 0))>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -789,10 +776,10 @@ def : Pat<(i32 imm:$val), (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>; // subc -def : Pat<(subc IntRegs:$b, IntRegs:$c), - (SUBCCrr IntRegs:$b, IntRegs:$c)>; -def : Pat<(subc IntRegs:$b, simm13:$val), - (SUBCCri IntRegs:$b, imm:$val)>; +def : Pat<(subc i32:$b, i32:$c), + (SUBCCrr $b, $c)>; +def : Pat<(subc i32:$b, simm13:$val), + (SUBCCri $b, imm:$val)>; // Global addresses, constant pool entries def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>; @@ -801,10 +788,10 @@ def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>; def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>; // Add reg, lo. This is used when taking the addr of a global/constpool entry. -def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)), - (ADDri IntRegs:$r, tglobaladdr:$in)>; -def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)), - (ADDri IntRegs:$r, tconstpool:$in)>; +def : Pat<(add i32:$r, (SPlo tglobaladdr:$in)), + (ADDri $r, tglobaladdr:$in)>; +def : Pat<(add i32:$r, (SPlo tconstpool:$in)), + (ADDri $r, tconstpool:$in)>; // Calls: def : Pat<(call tglobaladdr:$dst), diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 357879bf6c..b53a1ed095 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -40,7 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { int SPAdj, unsigned FIOperandNum, RegScavenger *RS = NULL) const; - void processFunctionBeforeFrameFinalized(MachineFunction &MF) const; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS = NULL) const; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const; diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index c6cafe59eb..ee88ce77c0 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -610,6 +610,9 @@ struct StringComparator { // Provided for compatibility with MSVC's debug mode. bool operator()(StringRef LHS, const char *RHS) const { return LHS < RHS; } bool operator()(StringRef LHS, StringRef RHS) const { return LHS < RHS; } + bool operator()(const char *LHS, const char *RHS) const { + return std::strcmp(LHS, RHS) < 0; + } }; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 7d8b49cdf2..e7282519d5 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" @@ -61,6 +63,30 @@ TargetMachine::~TargetMachine() { delete AsmInfo; } +/// \brief Reset the target options based on the function's attributes. +void TargetMachine::resetTargetOptions(const MachineFunction *MF) const { + const Function *F = MF->getFunction(); + TargetOptions &TO = MF->getTarget().Options; + +#define RESET_OPTION(X, Y) \ + do { \ + if (F->hasFnAttribute(Y)) \ + TO.X = \ + (F->getAttributes(). \ + getAttribute(AttributeSet::FunctionIndex, \ + Y).getValueAsString() == "true"); \ + } while (0) + + RESET_OPTION(NoFramePointerElim, "no-frame-pointer-elim"); + RESET_OPTION(NoFramePointerElimNonLeaf, "no-frame-pointer-elim-non-leaf"); + RESET_OPTION(LessPreciseFPMADOption, "less-precise-fpmad"); + RESET_OPTION(UnsafeFPMath, "unsafe-fp-math"); + RESET_OPTION(NoInfsFPMath, "no-infs-fp-math"); + RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); + RESET_OPTION(UseSoftFloat, "use-soft-float"); + RESET_OPTION(DisableTailCalls, "disable-tail-calls"); +} + /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. Reloc::Model TargetMachine::getRelocationModel() const { diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index ee5c2b2bfd..75d26f55c3 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -58,10 +58,15 @@ private: X86Operand *ParseIntelOperand(); X86Operand *ParseIntelOffsetOfOperator(SMLoc StartLoc); X86Operand *ParseIntelOperator(SMLoc StartLoc, unsigned OpKind); - X86Operand *ParseIntelMemOperand(unsigned SegReg, SMLoc StartLoc); - X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size); + X86Operand *ParseIntelMemOperand(unsigned SegReg, uint64_t ImmDisp, + SMLoc StartLoc); + X86Operand *ParseIntelBracExpression(unsigned SegReg, uint64_t ImmDisp, + unsigned Size); X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + X86Operand *CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, SMLoc End, + SMLoc SizeDirLoc, unsigned Size); + bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr **NewDisp, SmallString<64> &Err); @@ -170,30 +175,33 @@ struct X86Operand : public MCParsedAsmOperand { SMLoc OffsetOfLoc; bool AddressOf; + struct TokOp { + const char *Data; + unsigned Length; + }; + + struct RegOp { + unsigned RegNo; + }; + + struct ImmOp { + const MCExpr *Val; + }; + + struct MemOp { + unsigned SegReg; + const MCExpr *Disp; + unsigned BaseReg; + unsigned IndexReg; + unsigned Scale; + unsigned Size; + }; + union { - struct { - const char *Data; - unsigned Length; - } Tok; - - struct { - unsigned RegNo; - } Reg; - - struct { - const MCExpr *Val; - bool NeedAsmRewrite; - } Imm; - - struct { - unsigned SegReg; - const MCExpr *Disp; - unsigned BaseReg; - unsigned IndexReg; - unsigned Scale; - unsigned Size; - bool NeedSizeDir; - } Mem; + struct TokOp Tok; + struct RegOp Reg; + struct ImmOp Imm; + struct MemOp Mem; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -231,11 +239,6 @@ struct X86Operand : public MCParsedAsmOperand { return Imm.Val; } - bool needAsmRewrite() const { - assert(Kind == Immediate && "Invalid access!"); - return Imm.NeedAsmRewrite; - } - const MCExpr *getMemDisp() const { assert(Kind == Memory && "Invalid access!"); return Mem.Disp; @@ -332,11 +335,6 @@ struct X86Operand : public MCParsedAsmOperand { return isImmSExti64i32Value(CE->getValue()); } - unsigned getMemSize() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.Size; - } - bool isOffsetOf() const { return OffsetOfLoc.getPointer(); } @@ -345,11 +343,6 @@ struct X86Operand : public MCParsedAsmOperand { return AddressOf; } - bool needSizeDirective() const { - assert(Kind == Memory && "Invalid access!"); - return Mem.NeedSizeDir; - } - bool isMem() const { return Kind == Memory; } bool isMem8() const { return Kind == Memory && (!Mem.Size || Mem.Size == 8); @@ -485,17 +478,15 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc, - bool NeedRewrite = true){ + static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; - Res->Imm.NeedAsmRewrite = NeedRewrite; return Res; } /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -503,7 +494,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = 0; Res->Mem.Scale = 1; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -512,7 +502,7 @@ struct X86Operand : public MCParsedAsmOperand { static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, bool NeedSizeDir = false) { + unsigned Size = 0) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -527,7 +517,6 @@ struct X86Operand : public MCParsedAsmOperand { Res->Mem.IndexReg = IndexReg; Res->Mem.Scale = Scale; Res->Mem.Size = Size; - Res->Mem.NeedSizeDir = NeedSizeDir; Res->AddressOf = false; return Res; } @@ -711,8 +700,8 @@ class IntelBracExprStateMachine { bool isPlus; public: - IntelBracExprStateMachine(MCAsmParser &parser) : - State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(0), + IntelBracExprStateMachine(MCAsmParser &parser, int64_t disp) : + State(IBES_START), BaseReg(0), IndexReg(0), Scale(1), Disp(disp), TmpReg(0), TmpInteger(0), isPlus(true) {} unsigned getBaseReg() { return BaseReg; } @@ -890,7 +879,47 @@ public: } }; -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, +X86Operand *X86AsmParser::CreateMemForInlineAsm(const MCExpr *Disp, SMLoc Start, + SMLoc End, SMLoc SizeDirLoc, + unsigned Size) { + bool NeedSizeDir = false; + bool IsVarDecl = false; + if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { + const MCSymbol &Sym = SymRef->getSymbol(); + // FIXME: The SemaLookup will fail if the name is anything other then an + // identifier. + // FIXME: Pass a valid SMLoc. + unsigned tLength, tSize, tType; + SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, + tSize, tType, IsVarDecl); + if (!Size) { + Size = tType * 8; // Size is in terms of bits in this context. + NeedSizeDir = Size > 0; + } + } + + // If this is not a VarDecl then assume it is a FuncDecl or some other label + // reference. We need an 'r' constraint here, so we need to create register + // operand to ensure proper matching. Just pick a GPR based on the size of + // a pointer. + if (!IsVarDecl) { + unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; + return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); + } + + if (NeedSizeDir) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, SizeDirLoc, + /*Len*/0, Size)); + + // When parsing inline assembly we set the base register to a non-zero value + // as we don't know the actual value at this time. This is necessary to + // get the matching correct in some cases. + return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, + /*Scale*/1, Start, End, Size); +} + +X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, + uint64_t ImmDisp, unsigned Size) { const AsmToken &Tok = Parser.getTok(); SMLoc Start = Tok.getLoc(), End = Tok.getEndLoc(); @@ -902,7 +931,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, unsigned TmpReg = 0; - // Try to handle '[' 'symbol' ']' + // Try to handle '[' 'Symbol' ']' if (getLexer().is(AsmToken::Identifier)) { if (ParseRegister(TmpReg, Start, End)) { const MCExpr *Disp; @@ -911,16 +940,27 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, if (getLexer().isNot(AsmToken::RBrac)) return ErrorOperand(Parser.getTok().getLoc(), "Expected ']' token!"); + + // FIXME: We don't handle 'ImmDisp' '[' 'Symbol' ']'. + if (ImmDisp) + return ErrorOperand(Start, "Unsupported immediate displacement!"); + // Adjust the EndLoc due to the ']'. End = SMLoc::getFromPointer(Parser.getTok().getEndLoc().getPointer()-1); Parser.Lex(); - return X86Operand::CreateMem(Disp, Start, End, Size); + if (!isParsingInlineAsm()) + return X86Operand::CreateMem(Disp, Start, End, Size); + + // We want the size directive before the '['. + SMLoc SizeDirLoc = SMLoc::getFromPointer(Start.getPointer()-1); + return CreateMemForInlineAsm(Disp, Start, End, SizeDirLoc, Size); } } - // Parse [ BaseReg + Scale*IndexReg + Disp ]. + // Parse [ BaseReg + Scale*IndexReg + Disp ]. We may have already parsed an + // immediate displacement before the bracketed expression. bool Done = false; - IntelBracExprStateMachine SM(Parser); + IntelBracExprStateMachine SM(Parser, ImmDisp); // If we parsed a register, then the end loc has already been set and // the identifier has already been lexed. We also need to update the @@ -1007,7 +1047,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, } /// ParseIntelMemOperand - Parse intel style memory operand. -X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { +X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, + uint64_t ImmDisp, + SMLoc Start) { const AsmToken &Tok = Parser.getTok(); SMLoc End; @@ -1019,8 +1061,21 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { Parser.Lex(); } + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + if (getLexer().is(AsmToken::Integer)) { + const AsmToken &IntTok = Parser.getTok(); + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, + IntTok.getLoc())); + uint64_t ImmDisp = IntTok.getIntVal(); + Parser.Lex(); // Eat the integer. + if (getLexer().isNot(AsmToken::LBrac)) + return ErrorOperand(Start, "Expected '[' token!"); + return ParseIntelBracExpression(SegReg, ImmDisp, Size); + } + if (getLexer().is(AsmToken::LBrac)) - return ParseIntelBracExpression(SegReg, Size); + return ParseIntelBracExpression(SegReg, ImmDisp, Size); if (!ParseRegister(SegReg, Start, End)) { // Handel SegReg : [ ... ] @@ -1029,47 +1084,16 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) { Parser.Lex(); // Eat : if (getLexer().isNot(AsmToken::LBrac)) return ErrorOperand(Start, "Expected '[' token!"); - return ParseIntelBracExpression(SegReg, Size); + return ParseIntelBracExpression(SegReg, ImmDisp, Size); } const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getParser().parseExpression(Disp, End)) return 0; - bool NeedSizeDir = false; - bool IsVarDecl = false; - if (isParsingInlineAsm()) { - if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) { - const MCSymbol &Sym = SymRef->getSymbol(); - // FIXME: The SemaLookup will fail if the name is anything other then an - // identifier. - // FIXME: Pass a valid SMLoc. - unsigned tLength, tSize, tType; - SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, tLength, - tSize, tType, IsVarDecl); - if (!Size) - Size = tType * 8; // Size is in terms of bits in this context. - NeedSizeDir = Size > 0; - } - } if (!isParsingInlineAsm()) return X86Operand::CreateMem(Disp, Start, End, Size); - else { - // If this is not a VarDecl then assume it is a FuncDecl or some other label - // reference. We need an 'r' constraint here, so we need to create register - // operand to ensure proper matching. Just pick a GPR based on the size of - // a pointer. - if (!IsVarDecl) { - unsigned RegNo = is64BitMode() ? X86::RBX : X86::EBX; - return X86Operand::CreateReg(RegNo, Start, End, /*AddressOf=*/true); - } - - // When parsing inline assembly we set the base register to a non-zero value - // as we don't know the actual value at this time. This is necessary to - // get the matching correct in some cases. - return X86Operand::CreateMem(/*SegReg*/0, Disp, /*BaseReg*/1, /*IndexReg*/0, - /*Scale*/1, Start, End, Size, NeedSizeDir); - } + return CreateMemForInlineAsm(Disp, Start, End, Start, Size); } /// Parse the '.' operator. @@ -1197,7 +1221,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(SMLoc Start, unsigned OpKind) { InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal)); const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext()); - return X86Operand::CreateImm(Imm, Start, End, /*NeedAsmRewrite*/false); + return X86Operand::CreateImm(Imm, Start, End); } X86Operand *X86AsmParser::ParseIntelOperand() { @@ -1220,8 +1244,24 @@ X86Operand *X86AsmParser::ParseIntelOperand() { if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) || getLexer().is(AsmToken::Minus)) { const MCExpr *Val; + bool isInteger = getLexer().is(AsmToken::Integer); if (!getParser().parseExpression(Val, End)) { - return X86Operand::CreateImm(Val, Start, End); + if (isParsingInlineAsm()) + InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start)); + // Immediate. + if (getLexer().isNot(AsmToken::LBrac)) + return X86Operand::CreateImm(Val, Start, End); + + // Only positive immediates are valid. + if (!isInteger) { + Error(Parser.getTok().getLoc(), "expected a positive immediate " + "displacement before bracketed expr."); + return 0; + } + + // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ]. + if (uint64_t ImmDisp = dyn_cast<MCConstantExpr>(Val)->getValue()) + return ParseIntelMemOperand(/*SegReg=*/0, ImmDisp, Start); } } @@ -1234,11 +1274,11 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return X86Operand::CreateReg(RegNo, Start, End); getParser().Lex(); // Eat the colon. - return ParseIntelMemOperand(RegNo, Start); + return ParseIntelMemOperand(/*SegReg=*/RegNo, /*Disp=*/0, Start); } // Memory operand. - return ParseIntelMemOperand(0, Start); + return ParseIntelMemOperand(/*SegReg=*/0, /*Disp=*/0, Start); } X86Operand *X86AsmParser::ParseATTOperand() { @@ -1262,7 +1302,6 @@ X86Operand *X86AsmParser::ParseATTOperand() { if (getLexer().isNot(AsmToken::Colon)) return X86Operand::CreateReg(RegNo, Start, End); - getParser().Lex(); // Eat the colon. return ParseMemOperand(RegNo, Start); } @@ -1734,242 +1773,74 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return false; } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { - switch (Inst.getOpcode()) { - default: return false; - case X86::AND16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::AND64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::AND64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::XOR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::XOR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::OR64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::OR64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; - - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::CMP64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg, + bool isCmp) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + if (!isCmp) + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(MCOperand::CreateReg(Reg)); + TmpInst.addOperand(Inst.getOperand(0)); + Inst = TmpInst; + return true; +} - MCInst TmpInst; - TmpInst.setOpcode(X86::CMP64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti16i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::AX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::ADD64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti32i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::ADD64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB16i16: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti16i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::EAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB16ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::AX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB32i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti32i8Value(Inst.getOperand(0).getImm())) - return false; +static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, + bool isCmp = false) { + if (!Inst.getOperand(0).isImm() || + !isImmSExti64i8Value(Inst.getOperand(0).getImm())) + return false; - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB32ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::EAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } - case X86::SUB64i32: { - if (!Inst.getOperand(0).isImm() || - !isImmSExti64i8Value(Inst.getOperand(0).getImm())) - return false; + return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); +} - MCInst TmpInst; - TmpInst.setOpcode(X86::SUB64ri8); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(MCOperand::CreateReg(X86::RAX)); - TmpInst.addOperand(Inst.getOperand(0)); - Inst = TmpInst; - return true; - } +bool X86AsmParser:: +processInstruction(MCInst &Inst, + const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { + switch (Inst.getOpcode()) { + default: return false; + case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); + case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8); + case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8); + case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8); + case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8); + case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8); + case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8); + case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8); + case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8); + case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true); + case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true); + case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true); + case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8); + case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8); + case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8); + case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8); + case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8); + case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8); + case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8); + case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8); + case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8); + case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8); + case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8); + case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8); } } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index 85d8a991dd..e40edba6d6 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -61,7 +61,7 @@ static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint8_t opcode) { const struct ContextDecision* decision = 0; - + switch (type) { case ONEBYTE: decision = &ONEBYTE_SYM; @@ -102,7 +102,7 @@ static InstrUID decode(OpcodeType type, uint8_t opcode, uint8_t modRM) { const struct ModRMDecision* dec = 0; - + switch (type) { case ONEBYTE: dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; @@ -123,7 +123,7 @@ static InstrUID decode(OpcodeType type, dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; } - + switch (dec->modrm_type) { default: debug("Corrupt table! Unknown modrm_type"); @@ -171,10 +171,10 @@ static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { */ static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); - + if (!ret) ++(insn->readerCursor); - + return ret; } @@ -238,19 +238,19 @@ CONSUME_FUNC(consumeUInt64, uint64_t) */ static void dbgprintf(struct InternalInstruction* insn, const char* format, - ...) { + ...) { char buffer[256]; va_list ap; - + if (!insn->dlog) return; - + va_start(ap, format); (void)vsnprintf(buffer, sizeof(buffer), format, ap); va_end(ap); - + insn->dlog(insn->dlogArg, buffer); - + return; } @@ -305,27 +305,40 @@ static int readPrefixes(struct InternalInstruction* insn) { BOOL prefixGroups[4] = { FALSE }; uint64_t prefixLocation; uint8_t byte = 0; - + BOOL hasAdSize = FALSE; BOOL hasOpSize = FALSE; - + dbgprintf(insn, "readPrefixes()"); - + while (isPrefix) { prefixLocation = insn->readerCursor; - + if (consumeByte(insn, &byte)) return -1; /* - * If the first byte is a LOCK prefix break and let it be disassembled - * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. - * FIXME there is currently no way to get the disassembler to print the - * lock prefix if it is not the first byte. + * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then + * break and let it be disassembled as a normal "instruction". */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) - break; - + if (insn->readerCursor - 1 == insn->startLocation + && (byte == 0xf0 || byte == 0xf2 || byte == 0xf3)) { + uint8_t nextByte; + if (byte == 0xf0) + break; + if (lookAtByte(insn, &nextByte)) + return -1; + if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { + if (consumeByte(insn, &nextByte)) + return -1; + if (lookAtByte(insn, &nextByte)) + return -1; + unconsumeByte(insn); + } + if (nextByte != 0x0f && nextByte != 0x90) + break; + } + switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ @@ -387,21 +400,21 @@ static int readPrefixes(struct InternalInstruction* insn) { isPrefix = FALSE; break; } - + if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } - + insn->vexSize = 0; - + if (byte == 0xc4) { uint8_t byte1; - + if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } - + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 3; insn->necessaryPrefixLocation = insn->readerCursor - 1; @@ -410,67 +423,67 @@ static int readPrefixes(struct InternalInstruction* insn) { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } - + if (insn->vexSize == 3) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); consumeByte(insn, &insn->vexPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ - + if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 + insn->rexPrefix = 0x40 | (wFromVEX3of3(insn->vexPrefix[2]) << 3) | (rFromVEX2of3(insn->vexPrefix[1]) << 2) | (xFromVEX2of3(insn->vexPrefix[1]) << 1) | (bFromVEX2of3(insn->vexPrefix[1]) << 0); } - + switch (ppFromVEX3of3(insn->vexPrefix[2])) { default: break; case VEX_PREFIX_66: - hasOpSize = TRUE; + hasOpSize = TRUE; break; } - + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); } } else if (byte == 0xc5) { uint8_t byte1; - + if (lookAtByte(insn, &byte1)) { dbgprintf(insn, "Couldn't read second byte of VEX"); return -1; } - + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 2; } else { unconsumeByte(insn); } - + if (insn->vexSize == 2) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); - + if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 + insn->rexPrefix = 0x40 | (rFromVEX2of2(insn->vexPrefix[1]) << 2); } - + switch (ppFromVEX2of2(insn->vexPrefix[1])) { default: break; case VEX_PREFIX_66: - hasOpSize = TRUE; + hasOpSize = TRUE; break; } - + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); } } @@ -478,17 +491,17 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->mode == MODE_64BIT) { if ((byte & 0xf0) == 0x40) { uint8_t opcodeByte; - + if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { dbgprintf(insn, "Redundant REX prefix"); return -1; } - + insn->rexPrefix = byte; insn->necessaryPrefixLocation = insn->readerCursor - 2; - + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { + } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } @@ -526,7 +539,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->immediateSize = (hasOpSize ? 2 : 4); } } - + return 0; } @@ -537,22 +550,22 @@ static int readPrefixes(struct InternalInstruction* insn) { * @param insn - The instruction whose opcode is to be read. * @return - 0 if the opcode could be read successfully; nonzero otherwise. */ -static int readOpcode(struct InternalInstruction* insn) { +static int readOpcode(struct InternalInstruction* insn) { /* Determine the length of the primary opcode */ - + uint8_t current; - + dbgprintf(insn, "readOpcode()"); - + insn->opcodeType = ONEBYTE; - + if (insn->vexSize == 3) { switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); - return -1; + return -1; case 0: break; case VEX_LOB_0F: @@ -564,7 +577,7 @@ static int readOpcode(struct InternalInstruction* insn) { insn->threeByteEscape = 0x38; insn->opcodeType = THREEBYTE_38; return consumeByte(insn, &insn->opcode); - case VEX_LOB_0F3A: + case VEX_LOB_0F3A: insn->twoByteEscape = 0x0f; insn->threeByteEscape = 0x3a; insn->opcodeType = THREEBYTE_3A; @@ -577,68 +590,68 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = TWOBYTE; return consumeByte(insn, &insn->opcode); } - + if (consumeByte(insn, ¤t)) return -1; - + if (current == 0x0f) { dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); - + insn->twoByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + if (current == 0x38) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_38; } else if (current == 0x3a) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_3A; } else if (current == 0xa6) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_A6; } else if (current == 0xa7) { dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - + insn->threeByteEscape = current; - + if (consumeByte(insn, ¤t)) return -1; - + insn->opcodeType = THREEBYTE_A7; } else { dbgprintf(insn, "Didn't find a three-byte escape prefix"); - + insn->opcodeType = TWOBYTE; } } - + /* * At this point we have consumed the full opcode. * Anything we consume from here on must be unconsumed. */ - + insn->opcode = current; - + return 0; } @@ -660,19 +673,19 @@ static int getIDWithAttrMask(uint16_t* instructionID, struct InternalInstruction* insn, uint8_t attrMask) { BOOL hasModRMExtension; - + uint8_t instructionClass; instructionClass = contextForAttrs(attrMask); - + hasModRMExtension = modRMRequired(insn->opcodeType, instructionClass, insn->opcode); - + if (hasModRMExtension) { if (readModRM(insn)) return -1; - + *instructionID = decode(insn->opcodeType, instructionClass, insn->opcode, @@ -683,7 +696,7 @@ static int getIDWithAttrMask(uint16_t* instructionID, insn->opcode, 0); } - + return 0; } @@ -696,7 +709,7 @@ static int getIDWithAttrMask(uint16_t* instructionID, */ static BOOL is16BitEquivalent(const char* orig, const char* equiv) { off_t i; - + for (i = 0;; i++) { if (orig[i] == '\0' && equiv[i] == '\0') return TRUE; @@ -715,8 +728,8 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) { } /* - * getID - Determines the ID of an instruction, consuming the ModR/M byte as - * appropriate for extended and escape opcodes. Determines the attributes and + * getID - Determines the ID of an instruction, consuming the ModR/M byte as + * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. * * @param insn - The instruction whose ID is to be determined. @@ -726,21 +739,21 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) { static int getID(struct InternalInstruction* insn, const void *miiArg) { uint8_t attrMask; uint16_t instructionID; - + dbgprintf(insn, "getID()"); - + attrMask = ATTR_NONE; if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; - + if (insn->vexSize) { attrMask |= ATTR_VEX; if (insn->vexSize == 3) { switch (ppFromVEX3of3(insn->vexPrefix[2])) { case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; + attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; @@ -749,14 +762,14 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { attrMask |= ATTR_XD; break; } - + if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } else if (insn->vexSize == 2) { switch (ppFromVEX2of2(insn->vexPrefix[1])) { case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; + attrMask |= ATTR_OPSIZE; break; case VEX_PREFIX_F3: attrMask |= ATTR_XS; @@ -765,7 +778,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { attrMask |= ATTR_XD; break; } - + if (lFromVEX2of2(insn->vexPrefix[1])) attrMask |= ATTR_VEXL; } @@ -836,26 +849,26 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { * conservative, but in the specific case where OpSize is present but not * in the right place we check if there's a 16-bit operation. */ - + const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; const char *specName, *specWithOpSizeName; - + spec = specifierForUID(instructionID); - + if (getIDWithAttrMask(&instructionIDWithOpsize, insn, attrMask | ATTR_OPSIZE)) { - /* + /* * ModRM required with OpSize but not present; give up and return version * without OpSize set */ - + insn->instructionID = instructionID; insn->spec = spec; return 0; } - + specName = x86DisassemblerGetInstrName(instructionID, miiArg); specWithOpSizeName = x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); @@ -882,10 +895,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { const struct InstructionSpecifier *specWithNewOpcode; spec = specifierForUID(instructionID); - + /* Borrow opcode from one of the other XCHGar opcodes */ insn->opcode = 0x91; - + if (getIDWithAttrMask(&instructionIDWithNewOpcode, insn, attrMask)) { @@ -906,10 +919,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { return 0; } - + insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); - + return 0; } @@ -924,14 +937,14 @@ static int readSIB(struct InternalInstruction* insn) { SIBIndex sibIndexBase = 0; SIBBase sibBaseBase = 0; uint8_t index, base; - + dbgprintf(insn, "readSIB()"); - + if (insn->consumedSIB) return 0; - + insn->consumedSIB = TRUE; - + switch (insn->addressSize) { case 2: dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); @@ -949,9 +962,9 @@ static int readSIB(struct InternalInstruction* insn) { if (consumeByte(insn, &insn->sib)) return -1; - + index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); - + switch (index) { case 0x4: insn->sibIndex = SIB_INDEX_NONE; @@ -963,7 +976,7 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibIndex = SIB_INDEX_NONE; break; } - + switch (scaleFromSIB(insn->sib)) { case 0: insn->sibScale = 1; @@ -978,9 +991,9 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibScale = 8; break; } - + base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); - + switch (base) { case 0x5: switch (modFromModRM(insn->modRM)) { @@ -990,12 +1003,12 @@ static int readSIB(struct InternalInstruction* insn) { break; case 0x1: insn->eaDisplacement = EA_DISP_8; - insn->sibBase = (insn->addressSize == 4 ? + insn->sibBase = (insn->addressSize == 4 ? SIB_BASE_EBP : SIB_BASE_RBP); break; case 0x2: insn->eaDisplacement = EA_DISP_32; - insn->sibBase = (insn->addressSize == 4 ? + insn->sibBase = (insn->addressSize == 4 ? SIB_BASE_EBP : SIB_BASE_RBP); break; case 0x3: @@ -1007,7 +1020,7 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibBase = (SIBBase)(sibBaseBase + base); break; } - + return 0; } @@ -1015,22 +1028,22 @@ static int readSIB(struct InternalInstruction* insn) { * readDisplacement - Consumes the displacement of an instruction. * * @param insn - The instruction whose displacement is to be read. - * @return - 0 if the displacement byte was successfully read; nonzero + * @return - 0 if the displacement byte was successfully read; nonzero * otherwise. */ -static int readDisplacement(struct InternalInstruction* insn) { +static int readDisplacement(struct InternalInstruction* insn) { int8_t d8; int16_t d16; int32_t d32; - + dbgprintf(insn, "readDisplacement()"); - + if (insn->consumedDisplacement) return 0; - + insn->consumedDisplacement = TRUE; insn->displacementOffset = insn->readerCursor - insn->startLocation; - + switch (insn->eaDisplacement) { case EA_DISP_NONE: insn->consumedDisplacement = FALSE; @@ -1051,7 +1064,7 @@ static int readDisplacement(struct InternalInstruction* insn) { insn->displacement = d32; break; } - + insn->consumedDisplacement = TRUE; return 0; } @@ -1063,22 +1076,22 @@ static int readDisplacement(struct InternalInstruction* insn) { * @param insn - The instruction whose addressing information is to be read. * @return - 0 if the information was successfully read; nonzero otherwise. */ -static int readModRM(struct InternalInstruction* insn) { +static int readModRM(struct InternalInstruction* insn) { uint8_t mod, rm, reg; - + dbgprintf(insn, "readModRM()"); - + if (insn->consumedModRM) return 0; - + if (consumeByte(insn, &insn->modRM)) return -1; insn->consumedModRM = TRUE; - + mod = modFromModRM(insn->modRM); rm = rmFromModRM(insn->modRM); reg = regFromModRM(insn->modRM); - + /* * This goes by insn->registerSize to pick the correct register, which messes * up if we're using (say) XMM or 8-bit register operands. That gets fixed in @@ -1098,16 +1111,16 @@ static int readModRM(struct InternalInstruction* insn) { insn->eaRegBase = EA_REG_RAX; break; } - + reg |= rFromREX(insn->rexPrefix) << 3; rm |= bFromREX(insn->rexPrefix) << 3; - + insn->reg = (Reg)(insn->regBase + reg); - + switch (insn->addressSize) { case 2: insn->eaBaseBase = EA_BASE_BX_SI; - + switch (mod) { case 0x0: if (rm == 0x6) { @@ -1142,14 +1155,14 @@ static int readModRM(struct InternalInstruction* insn) { case 4: case 8: insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); - + switch (mod) { case 0x0: insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ switch (rm) { case 0x4: case 0xc: /* in case REXW.b is set */ - insn->eaBase = (insn->addressSize == 4 ? + insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); readSIB(insn); if (readDisplacement(insn)) @@ -1191,7 +1204,7 @@ static int readModRM(struct InternalInstruction* insn) { } break; } /* switch (insn->addressSize) */ - + return 0; } @@ -1274,12 +1287,12 @@ GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) * @return - 0 if fixup was successful; -1 if the register returned was * invalid for its class. */ -static int fixupReg(struct InternalInstruction *insn, +static int fixupReg(struct InternalInstruction *insn, const struct OperandSpecifier *op) { uint8_t valid; - + dbgprintf(insn, "fixupReg()"); - + switch ((OperandEncoding)op->encoding) { default: debug("Expected a REG or R/M encoding in fixupReg"); @@ -1311,12 +1324,12 @@ static int fixupReg(struct InternalInstruction *insn, } break; } - + return 0; } /* - * readOpcodeModifier - Reads an operand from the opcode field of an + * readOpcodeModifier - Reads an operand from the opcode field of an * instruction. Handles AddRegFrm instructions. * * @param insn - The instruction whose opcode field is to be read. @@ -1326,12 +1339,12 @@ static int fixupReg(struct InternalInstruction *insn, */ static int readOpcodeModifier(struct InternalInstruction* insn) { dbgprintf(insn, "readOpcodeModifier()"); - + if (insn->consumedOpcodeModifier) return 0; - + insn->consumedOpcodeModifier = TRUE; - + switch (insn->spec->modifierType) { default: debug("Unknown modifier type."); @@ -1345,11 +1358,11 @@ static int readOpcodeModifier(struct InternalInstruction* insn) { case MODIFIER_MODRM: insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; return 0; - } + } } /* - * readOpcodeRegister - Reads an operand from the opcode field of an + * readOpcodeRegister - Reads an operand from the opcode field of an * instruction and interprets it appropriately given the operand width. * Handles AddRegFrm instructions. * @@ -1364,39 +1377,39 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { if (readOpcodeModifier(insn)) return -1; - + if (size == 0) size = insn->registerSize; - + switch (size) { case 1: - insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) + insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); - if (insn->rexPrefix && + if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && insn->opcodeRegister < MODRM_REG_AL + 0x8) { insn->opcodeRegister = (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); } - + break; case 2: insn->opcodeRegister = (Reg)(MODRM_REG_AX - + ((bFromREX(insn->rexPrefix) << 3) + + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); break; case 4: insn->opcodeRegister = (Reg)(MODRM_REG_EAX - + ((bFromREX(insn->rexPrefix) << 3) + + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); break; case 8: - insn->opcodeRegister = (Reg)(MODRM_REG_RAX - + ((bFromREX(insn->rexPrefix) << 3) + insn->opcodeRegister = (Reg)(MODRM_REG_RAX + + ((bFromREX(insn->rexPrefix) << 3) | insn->opcodeModifier)); break; } - + return 0; } @@ -1414,20 +1427,20 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { uint16_t imm16; uint32_t imm32; uint64_t imm64; - + dbgprintf(insn, "readImmediate()"); - + if (insn->numImmediatesConsumed == 2) { debug("Already consumed two immediates"); return -1; } - + if (size == 0) size = insn->immediateSize; else insn->immediateSize = size; insn->immediateOffset = insn->readerCursor - insn->startLocation; - + switch (size) { case 1: if (consumeByte(insn, &imm8)) @@ -1450,9 +1463,9 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { insn->immediates[insn->numImmediatesConsumed] = imm64; break; } - + insn->numImmediatesConsumed++; - + return 0; } @@ -1465,7 +1478,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { */ static int readVVVV(struct InternalInstruction* insn) { dbgprintf(insn, "readVVVV()"); - + if (insn->vexSize == 3) insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); else if (insn->vexSize == 2) @@ -1490,14 +1503,14 @@ static int readOperands(struct InternalInstruction* insn) { int index; int hasVVVV, needVVVV; int sawRegImm = 0; - + dbgprintf(insn, "readOperands()"); /* If non-zero vvvv specified, need to make sure one of the operands uses it. */ hasVVVV = !readVVVV(insn); needVVVV = hasVVVV && (insn->vvvv != 0); - + for (index = 0; index < X86_MAX_OPERANDS; ++index) { switch (x86OperandSets[insn->spec->operands][index].encoding) { case ENCODING_NONE: @@ -1599,7 +1612,7 @@ static int readOperands(struct InternalInstruction* insn) { /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ if (needVVVV) return -1; - + return 0; } @@ -1607,7 +1620,7 @@ static int readOperands(struct InternalInstruction* insn) { * decodeInstruction - Reads and interprets a full instruction provided by the * user. * - * @param insn - A pointer to the instruction to be populated. Must be + * @param insn - A pointer to the instruction to be populated. Must be * pre-allocated. * @param reader - The function to be used to read the instruction's bytes. * @param readerArg - A generic argument to be passed to the reader to store @@ -1632,7 +1645,7 @@ int decodeInstruction(struct InternalInstruction* insn, uint64_t startLoc, DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); - + insn->reader = reader; insn->readerArg = readerArg; insn->dlog = logger; @@ -1641,7 +1654,7 @@ int decodeInstruction(struct InternalInstruction* insn, insn->readerCursor = startLoc; insn->mode = mode; insn->numImmediatesConsumed = 0; - + if (readPrefixes(insn) || readOpcode(insn) || getID(insn, miiArg) || @@ -1650,14 +1663,14 @@ int decodeInstruction(struct InternalInstruction* insn, return -1; insn->operands = &x86OperandSets[insn->spec->operands][0]; - + insn->length = insn->readerCursor - insn->startLocation; - + dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", startLoc, insn->readerCursor, insn->length); - + if (insn->length > 15) dbgprintf(insn, "Instruction exceeds 15-byte limit"); - + return 0; } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 9e68388cf2..3669560070 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -276,9 +276,9 @@ namespace X86II { MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36, MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46, - MRM_D4 = 47, MRM_D5 = 48, MRM_D8 = 49, MRM_D9 = 50, - MRM_DA = 51, MRM_DB = 52, MRM_DC = 53, MRM_DD = 54, - MRM_DE = 55, MRM_DF = 56, + MRM_D4 = 47, MRM_D5 = 48, MRM_D6 = 49, MRM_D8 = 50, + MRM_D9 = 51, MRM_DA = 52, MRM_DB = 53, MRM_DC = 54, + MRM_DD = 55, MRM_DE = 56, MRM_DF = 57, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -574,16 +574,13 @@ namespace X86II { ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV). return FirstMemOp; } - case X86II::MRM_C1: case X86II::MRM_C2: - case X86II::MRM_C3: case X86II::MRM_C4: - case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_E8: case X86II::MRM_F0: - case X86II::MRM_F8: case X86II::MRM_F9: - case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D5: - case X86II::MRM_D8: case X86II::MRM_D9: - case X86II::MRM_DA: case X86II::MRM_DB: - case X86II::MRM_DC: case X86II::MRM_DD: + case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: + case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: + case X86II::MRM_F9: case X86II::MRM_D0: case X86II::MRM_D1: + case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6: + case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA: + case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF: return -1; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 122204ae75..776cee1e35 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // dst(ModR/M), src1(ModR/M) // dst(ModR/M), src1(ModR/M), imm8 // + // FMA4: + // dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM) + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; CurOp++; if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -666,9 +674,15 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1038,9 +1052,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + ++SrcRegNum; + EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); - CurOp += 2; + GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + CurOp = SrcRegNum + 1; break; case X86II::MRMDestMem: @@ -1117,16 +1136,13 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, TSFlags, CurByte, OS, Fixups); CurOp += X86::AddrNumOperands; break; - case X86II::MRM_C1: case X86II::MRM_C2: - case X86II::MRM_C3: case X86II::MRM_C4: - case X86II::MRM_C8: case X86II::MRM_C9: - case X86II::MRM_D0: case X86II::MRM_D1: - case X86II::MRM_D4: case X86II::MRM_D5: - case X86II::MRM_D8: case X86II::MRM_D9: - case X86II::MRM_DA: case X86II::MRM_DB: - case X86II::MRM_DC: case X86II::MRM_DD: - case X86II::MRM_DE: case X86II::MRM_DF: - case X86II::MRM_E8: case X86II::MRM_F0: + case X86II::MRM_C1: case X86II::MRM_C2: case X86II::MRM_C3: + case X86II::MRM_C4: case X86II::MRM_C8: case X86II::MRM_C9: + case X86II::MRM_D0: case X86II::MRM_D1: case X86II::MRM_D4: + case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D8: + case X86II::MRM_D9: case X86II::MRM_DA: case X86II::MRM_DB: + case X86II::MRM_DC: case X86II::MRM_DD: case X86II::MRM_DE: + case X86II::MRM_DF: case X86II::MRM_E8: case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: EmitByte(BaseOpcode, CurByte, OS); @@ -1143,6 +1159,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM_D1: MRM = 0xD1; break; case X86II::MRM_D4: MRM = 0xD4; break; case X86II::MRM_D5: MRM = 0xD5; break; + case X86II::MRM_D6: MRM = 0xD6; break; case X86II::MRM_D8: MRM = 0xD8; break; case X86II::MRM_D9: MRM = 0xD9; break; case X86II::MRM_DA: MRM = 0xDA; break; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 0216252c19..1dcc344e7f 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,8 +120,14 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureRTM : SubtargetFeature<"rtm", "HasRTM", "true", "Support RTM instructions">; +def FeatureHLE : SubtargetFeature<"hle", "HasHLE", "true", + "Support HLE">; def FeatureADX : SubtargetFeature<"adx", "HasADX", "true", "Support ADX instructions">; +def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", + "Support PRFCHW instructions">; +def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", + "Support RDSEED instruction">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", @@ -130,6 +136,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; +def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", + "CallRegIndirect", "true", + "Call register indirect">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -143,9 +152,6 @@ def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom", class Proc<string Name, list<SubtargetFeature> Features> : ProcessorModel<Name, GenericModel, Features>; -class AtomProc<string Name, list<SubtargetFeature> Features> - : ProcessorModel<Name, AtomModel, Features>; - def : Proc<"generic", []>; def : Proc<"i386", []>; def : Proc<"i486", []>; @@ -162,46 +168,61 @@ def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, FeatureFastUAMem]>; -def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; -def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, - FeatureSlowBTMem]>; -def : AtomProc<"atom", [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, FeaturePadShortFunctions]>; +// Intel Core Duo. +def : ProcessorModel<"yonah", SandyBridgeModel, + [FeatureSSE3, FeatureSlowBTMem]>; + +// NetBurst. +def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>; +def : Proc<"nocona", [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; + +// Intel Core 2 Solo/Duo. +def : ProcessorModel<"core2", SandyBridgeModel, + [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>; +def : ProcessorModel<"penryn", SandyBridgeModel, + [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; + +// Atom. +def : ProcessorModel<"atom", AtomModel, + [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide, + FeatureCallRegIndirect, + FeaturePadShortFunctions]>; + // "Arrandale" along with corei3 and corei5 -def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES]>; -def : Proc<"nehalem", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, - FeaturePOPCNT]>; +def : ProcessorModel<"corei7", SandyBridgeModel, + [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFastUAMem, FeaturePOPCNT, FeatureAES]>; + +def : ProcessorModel<"nehalem", SandyBridgeModel, + [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFastUAMem, FeaturePOPCNT]>; // Westmere is a similar machine to nehalem with some additional features. // Westmere is the corei3/i5/i7 path from nehalem to sandybridge -def : Proc<"westmere", [FeatureSSE42, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; +def : ProcessorModel<"westmere", SandyBridgeModel, + [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, + FeatureFastUAMem, FeaturePOPCNT, FeatureAES, + FeaturePCLMUL]>; // Sandy Bridge // SSE is not listed here since llvm treats AVX as a reimplementation of SSE, // rather than a superset. -def : Proc<"corei7-avx", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; +def : ProcessorModel<"corei7-avx", SandyBridgeModel, + [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>; // Ivy Bridge -def : Proc<"core-avx-i", [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL, - FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>; +def : ProcessorModel<"core-avx-i", SandyBridgeModel, + [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, + FeatureF16C, FeatureFSGSBase]>; // Haswell -def : Proc<"core-avx2", [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, - FeaturePOPCNT, FeatureAES, FeaturePCLMUL, - FeatureRDRAND, FeatureF16C, FeatureFSGSBase, - FeatureMOVBE, FeatureLZCNT, FeatureBMI, - FeatureBMI2, FeatureFMA, - FeatureRTM]>; +def : ProcessorModel<"core-avx2", HaswellModel, + [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem, + FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, + FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, + FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM, + FeatureHLE]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index ac5daec2b2..6b228b0b03 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -201,7 +201,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_TLVP_PIC_BASE: O << "@TLVP" << '-' << *MF->getPICBaseSymbol(); break; - case X86II::MO_SECREL: O << "@SECREL"; break; + case X86II::MO_SECREL: O << "@SECREL32"; break; } } diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index ece38aa346..2518e02e2a 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, const MCInstrDesc *Desc) const { bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; // VEX_R: opcode externsion equivalent to REX.R in // 1's complement (inverted) form @@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, if (HasVEX_4V) VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; CurOp++; @@ -1042,9 +1047,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, // MRMDestReg instructions forms: // dst(ModR/M), src(ModR/M) // dst(ModR/M), src(ModR/M), imm8 - if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg())) + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_B = 0x0; - if (X86II::isX86_64ExtendedReg(MI.getOperand(1).getReg())) + CurOp++; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) VEX_R = 0x0; break; case X86II::MRM0r: case X86II::MRM1r: @@ -1279,9 +1290,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case X86II::MRMDestReg: { MCE.emitByte(BaseOpcode); + + unsigned SrcRegNum = CurOp+1; + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + emitRegModRMByte(MI.getOperand(CurOp).getReg(), - getX86RegNum(MI.getOperand(CurOp+1).getReg())); - CurOp += 2; + getX86RegNum(MI.getOperand(SrcRegNum).getReg())); + CurOp = SrcRegNum + 1; break; } case X86II::MRMDestMem: { diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index b5c3270065..c5da0b9b16 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -816,14 +816,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { // The x86-64 ABI for returning structs by value requires that we copy // the sret argument into %rax for the return. We saved the argument into // a virtual register in the entry block, so now we copy the value out - // and into %rax. - if (Subtarget->is64Bit() && F.hasStructRetAttr()) { + // and into %rax. We also do the same with %eax for Win32. + if (F.hasStructRetAttr() && + (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { unsigned Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); + unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - X86::RAX).addReg(Reg); - RetRegs.push_back(X86::RAX); + RetReg).addReg(Reg); + RetRegs.push_back(RetReg); } // Now emit the RET. @@ -1526,6 +1528,9 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; + if (Subtarget->isTargetWindows()) + return false; + const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 00fbe6924c..6041669f81 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -444,7 +444,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && - (N->getOpcode() == X86ISD::CALL || + // Only does this when target favors doesn't favor register indirect + // call. + ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || (N->getOpcode() == X86ISD::TC_RETURN && // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 960870dc60..69341869aa 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -470,7 +470,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SETCC , MVT::i64 , Custom); } setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); - // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support + // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build @@ -1053,23 +1053,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); - if (Subtarget->hasInt256()) { - setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SRL, MVT::v4i32, Legal); - - setOperationAction(ISD::SHL, MVT::v2i64, Legal); - setOperationAction(ISD::SHL, MVT::v4i32, Legal); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v2i64, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Legal); - } else { - setOperationAction(ISD::SRL, MVT::v2i64, Custom); - setOperationAction(ISD::SRL, MVT::v4i32, Custom); + setOperationAction(ISD::SHL, MVT::v2i64, Custom); + setOperationAction(ISD::SHL, MVT::v4i32, Custom); - setOperationAction(ISD::SHL, MVT::v2i64, Custom); - setOperationAction(ISD::SHL, MVT::v4i32, Custom); + setOperationAction(ISD::SRA, MVT::v4i32, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Custom); - } setOperationAction(ISD::SDIV, MVT::v8i16, Custom); setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } @@ -1118,6 +1111,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal); @@ -1186,14 +1180,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - setOperationAction(ISD::SRL, MVT::v4i64, Legal); - setOperationAction(ISD::SRL, MVT::v8i32, Legal); - - setOperationAction(ISD::SHL, MVT::v4i64, Legal); - setOperationAction(ISD::SHL, MVT::v8i32, Legal); - - setOperationAction(ISD::SRA, MVT::v8i32, Legal); - setOperationAction(ISD::SDIV, MVT::v8i32, Custom); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); @@ -1210,15 +1196,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); // Don't lower v32i8 because there is no 128-bit byte mul + } - setOperationAction(ISD::SRL, MVT::v4i64, Custom); - setOperationAction(ISD::SRL, MVT::v8i32, Custom); + // In the customized shift lowering, the legal cases in AVX2 will be + // recognized. + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); - setOperationAction(ISD::SHL, MVT::v4i64, Custom); - setOperationAction(ISD::SHL, MVT::v8i32, Custom); + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); - setOperationAction(ISD::SRA, MVT::v8i32, Custom); - } + setOperationAction(ISD::SRA, MVT::v8i32, Custom); // Custom lower several nodes for 256-bit types. for (int i = MVT::FIRST_VECTOR_VALUETYPE; @@ -1356,7 +1344,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4; setPrefLoopAlignment(4); // 2^4 bytes. - BenefitFromCodePlacementOpt = true; // Predictable cmov don't hurt on atom because it's in-order. PredictableSelectIsExpensive = !Subtarget->isAtom(); @@ -1679,10 +1666,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, // The x86-64 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. + // Win32 requires us to put the sret argument to %eax as well. // We saved the argument into a virtual register in the entry block, // so now we copy the value out and into %rax/%eax. - if (Subtarget->is64Bit() && - DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { + if (DAG.getMachineFunction().getFunction()->hasStructRetAttr() && + (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); @@ -1690,12 +1678,14 @@ X86TargetLowering::LowerReturn(SDValue Chain, "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX; + unsigned RetValReg + = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ? + X86::RAX : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); Flag = Chain.getValue(1); // RAX/EAX now acts like a return value. - RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64)); + RetOps.push_back(DAG.getRegister(RetValReg, getPointerTy())); } RetOps[0] = Chain; // Update chain. @@ -2049,9 +2039,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // The x86-64 ABIs require that for returning structs by value we copy // the sret argument into %rax/%eax (depending on ABI) for the return. + // Win32 requires us to put the sret argument to %eax as well. // Save the argument into a virtual register so that we can access it // from the return points. - if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { + if (MF.getFunction()->hasStructRetAttr() && + (Subtarget->is64Bit() || Subtarget->isTargetWindows())) { X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { @@ -7834,7 +7826,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Chain.getValue(1)); } - if (Subtarget->isTargetWindows()) { + if (Subtarget->isTargetWindows() || Subtarget->isTargetMingw()) { // Just use the implicit TLS architecture // Need to generate someting similar to: // mov rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage @@ -7854,18 +7846,19 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or - // %gs:0x58 (64-bit). + // %gs:0x58 (64-bit). On MinGW, __tls_array is not available, so directly + // use its literal value of 0x2C. Value *Ptr = Constant::getNullValue(Subtarget->is64Bit() ? Type::getInt8PtrTy(*DAG.getContext(), 256) : Type::getInt32PtrTy(*DAG.getContext(), 257)); - SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, - Subtarget->is64Bit() - ? DAG.getIntPtrConstant(0x58) - : DAG.getExternalSymbol("_tls_array", - getPointerTy()), + SDValue TlsArray = Subtarget->is64Bit() ? DAG.getIntPtrConstant(0x58) : + (Subtarget->isTargetMingw() ? DAG.getIntPtrConstant(0x2C) : + DAG.getExternalSymbol("_tls_array", getPointerTy())); + + SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain, TlsArray, MachinePointerInfo(Ptr), false, false, false, 0); @@ -10921,16 +10914,23 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. - // RDRAND intrinsics. + // RDRAND/RDSEED intrinsics. case Intrinsic::x86_rdrand_16: case Intrinsic::x86_rdrand_32: - case Intrinsic::x86_rdrand_64: { + case Intrinsic::x86_rdrand_64: + case Intrinsic::x86_rdseed_16: + case Intrinsic::x86_rdseed_32: + case Intrinsic::x86_rdseed_64: { + unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 || + IntNo == Intrinsic::x86_rdseed_32 || + IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED : + X86ISD::RDRAND; // Emit the node with the right value type. SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other); - SDValue Result = DAG.getNode(X86ISD::RDRAND, dl, VTs, Op.getOperand(0)); + SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0)); - // If the value returned by RDRAND was valid (CF=1), return 1. Otherwise - // return the value from Rand, which is always 0, casted to i32. + // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. + // Otherwise return the value from Rand, which is always 0, casted to i32. SDValue Ops[] = { DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), DAG.getConstant(1, Op->getValueType(1)), DAG.getConstant(X86::COND_B, MVT::i32), @@ -10943,6 +10943,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } + + // XTEST intrinsics. + case Intrinsic::x86_xtest: { + SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); + SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0)); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_NE, MVT::i8), + InTrans); + SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), + Ret, SDValue(InTrans.getNode(), 1)); + } } } @@ -11490,16 +11502,13 @@ SDValue X86TargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { - +static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); - if (!Subtarget->hasSSE2()) - return SDValue(); - // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { SDValue SclrAmt = Amt->getOperand(0); @@ -11610,6 +11619,224 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } } + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + unsigned RatioInLog2 = Log2_32_Ceil(Ratio); + uint64_t ShiftAmt = 0; + for (unsigned i = 0; i != Ratio; ++i) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); + } + // Check remaining shift amounts. + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + uint64_t ShAmt = 0; + for (unsigned j = 0; j != Ratio; ++j) { + ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Amt.getOperand(i + j)); + if (C == 0) + return SDValue(); + // 6 == Log2(64) + ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); + } + if (ShAmt != ShiftAmt) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRLI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRAI, dl, VT, R, + DAG.getConstant(ShiftAmt, MVT::i32)); + } + } + + return SDValue(); +} + +static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, + const X86Subtarget* Subtarget) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + + if ((VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v4i32 || VT == MVT::v8i16 || + (Subtarget->hasInt256() && + ((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) || + VT == MVT::v8i32 || VT == MVT::v16i16))) { + SDValue BaseShAmt; + EVT EltVT = VT.getVectorElementType(); + + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = VT.getVectorNumElements(); + unsigned i, j; + for (i = 0; i != NumElts; ++i) { + if (Amt.getOperand(i).getOpcode() == ISD::UNDEF) + continue; + break; + } + for (j = i; j != NumElts; ++j) { + SDValue Arg = Amt.getOperand(j); + if (Arg.getOpcode() == ISD::UNDEF) continue; + if (Arg != Amt.getOperand(i)) + break; + } + if (i != NumElts && j == NumElts) + BaseShAmt = Amt.getOperand(i); + } else { + if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) + Amt = Amt.getOperand(0); + if (Amt.getOpcode() == ISD::VECTOR_SHUFFLE && + cast<ShuffleVectorSDNode>(Amt)->isSplat()) { + SDValue InVec = Amt.getOperand(0); + if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + unsigned i = 0; + for (; i != NumElts; ++i) { + SDValue Arg = InVec.getOperand(i); + if (Arg.getOpcode() == ISD::UNDEF) continue; + BaseShAmt = Arg; + break; + } + } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { + unsigned SplatIdx = + cast<ShuffleVectorSDNode>(Amt)->getSplatIndex(); + if (C->getZExtValue() == SplatIdx) + BaseShAmt = InVec.getOperand(1); + } + } + if (BaseShAmt.getNode() == 0) + BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt, + DAG.getIntPtrConstant(0)); + } + } + + if (BaseShAmt.getNode()) { + if (EltVT.bitsGT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BaseShAmt); + else if (EltVT.bitsLT(MVT::i32)) + BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRA: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v4i32: + case MVT::v8i16: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG); + } + case ISD::SRL: + switch (VT.getSimpleVT().SimpleTy) { + default: return SDValue(); + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v4i64: + case MVT::v8i32: + case MVT::v16i16: + return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG); + } + } + } + } + + // Special case in 32-bit mode, where i64 is expanded into high and low parts. + if (!Subtarget->is64Bit() && + (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) && + Amt.getOpcode() == ISD::BITCAST && + Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { + Amt = Amt.getOperand(0); + unsigned Ratio = Amt.getValueType().getVectorNumElements() / + VT.getVectorNumElements(); + std::vector<SDValue> Vals(Ratio); + for (unsigned i = 0; i != Ratio; ++i) + Vals[i] = Amt.getOperand(i); + for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) { + for (unsigned j = 0; j != Ratio; ++j) + if (Vals[j] != Amt.getOperand(i + j)) + return SDValue(); + } + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown shift opcode!"); + case ISD::SHL: + return DAG.getNode(X86ISD::VSHL, dl, VT, R, Op.getOperand(1)); + case ISD::SRL: + return DAG.getNode(X86ISD::VSRL, dl, VT, R, Op.getOperand(1)); + case ISD::SRA: + return DAG.getNode(X86ISD::VSRA, dl, VT, R, Op.getOperand(1)); + } + } + + return SDValue(); +} + +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { + + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + SDValue R = Op.getOperand(0); + SDValue Amt = Op.getOperand(1); + SDValue V; + + if (!Subtarget->hasSSE2()) + return SDValue(); + + V = LowerScalarImmediateShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + V = LowerScalarVariableShift(Op, DAG, Subtarget); + if (V.getNode()) + return V; + + // AVX2 has VPSLLV/VPSRAV/VPSRLV. + if (Subtarget->hasInt256()) { + if (Op.getOpcode() == ISD::SRL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SHL && + (VT == MVT::v2i64 || VT == MVT::v4i32 || + VT == MVT::v4i64 || VT == MVT::v8i32)) + return Op; + if (Op.getOpcode() == ISD::SRA && (VT == MVT::v4i32 || VT == MVT::v8i32)) + return Op; + } + // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, VT)); @@ -11826,8 +12053,23 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, // fall through case MVT::v4i32: case MVT::v8i16: { - SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, - Op.getOperand(0), ShAmt, DAG); + // (sext (vzext x)) -> (vsext x) + SDValue Op0 = Op.getOperand(0); + SDValue Op00 = Op0.getOperand(0); + SDValue Tmp1; + // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. + if (Op0.getOpcode() == ISD::BITCAST && + Op00.getOpcode() == ISD::VECTOR_SHUFFLE) + Tmp1 = LowerVectorIntExtend(Op00, DAG); + if (Tmp1.getNode()) { + SDValue Tmp1Op0 = Tmp1.getOperand(0); + assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && + "This optimization is invalid without a VZEXT."); + return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); + } + + // If the above didn't work, then just use Shift-Left + Shift-Right. + Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT, Op0, ShAmt, DAG); return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG); } } @@ -12262,7 +12504,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UINT_TO_FP: { - if (N->getOperand(0).getValueType() != MVT::v2i32 && + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + if (N->getOperand(0).getValueType() != MVT::v2i32 || N->getValueType(0) != MVT::v2f32) return; SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, @@ -12545,6 +12788,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::WIN_FTOL: return "X86ISD::WIN_FTOL"; case X86ISD::SAHF: return "X86ISD::SAHF"; case X86ISD::RDRAND: return "X86ISD::RDRAND"; + case X86ISD::RDSEED: return "X86ISD::RDSEED"; case X86ISD::FMADD: return "X86ISD::FMADD"; case X86ISD::FMSUB: return "X86ISD::FMSUB"; case X86ISD::FNMADD: return "X86ISD::FNMADD"; @@ -12553,6 +12797,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FMSUBADD: return "X86ISD::FMSUBADD"; case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; + case X86ISD::XTEST: return "X86ISD::XTEST"; } } @@ -15584,8 +15829,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { // Quit if the constant is neither 0 or 1. return SDValue(); - // Skip 'zext' node. - if (SetCC.getOpcode() == ISD::ZERO_EXTEND) + // Skip 'zext' or 'trunc' node. + if (SetCC.getOpcode() == ISD::ZERO_EXTEND || + SetCC.getOpcode() == ISD::TRUNCATE) SetCC = SetCC.getOperand(0); switch (SetCC.getOpcode()) { @@ -15604,9 +15850,15 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { return SDValue(); // Quit if false value is not a constant. if (!FVal) { - // A special case for rdrand, where 0 is set if false cond is found. SDValue Op = SetCC.getOperand(0); - if (Op.getOpcode() != X86ISD::RDRAND) + // Skip 'zext' or 'trunc' node. + if (Op.getOpcode() == ISD::ZERO_EXTEND || + Op.getOpcode() == ISD::TRUNCATE) + Op = Op.getOperand(0); + // A special case for rdrand/rdseed, where 0 is set if false cond is + // found. + if ((Op.getOpcode() != X86ISD::RDRAND && + Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) return SDValue(); } // Quit if false value is not the constant 0 or 1. @@ -15918,124 +16170,12 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { - EVT VT = N->getValueType(0); if (N->getOpcode() == ISD::SHL) { SDValue V = PerformSHLCombine(N, DAG); if (V.getNode()) return V; } - // On X86 with SSE2 support, we can transform this to a vector shift if - // all elements are shifted by the same amount. We can't do this in legalize - // because the a constant vector is typically transformed to a constant pool - // so we have no knowledge of the shift amount. - if (!Subtarget->hasSSE2()) - return SDValue(); - - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && - (!Subtarget->hasInt256() || - (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) - return SDValue(); - - SDValue ShAmtOp = N->getOperand(1); - EVT EltVT = VT.getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); - SDValue BaseShAmt = SDValue(); - if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = VT.getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - // Handle the case where the build_vector is all undef - // FIXME: Should DAG allow this? - if (i == NumElts) - return SDValue(); - - for (; i != NumElts; ++i) { - SDValue Arg = ShAmtOp.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - if (Arg != BaseShAmt) { - return SDValue(); - } - } - } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && - cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { - SDValue InVec = ShAmtOp.getOperand(0); - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = InVec.getValueType().getVectorNumElements(); - unsigned i = 0; - for (; i != NumElts; ++i) { - SDValue Arg = InVec.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) continue; - BaseShAmt = Arg; - break; - } - } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { - unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); - if (C->getZExtValue() == SplatIdx) - BaseShAmt = InVec.getOperand(1); - } - } - if (BaseShAmt.getNode() == 0) { - // Don't create instructions with illegal types after legalize - // types has run. - if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) && - !DCI.isBeforeLegalize()) - return SDValue(); - - BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, - DAG.getIntPtrConstant(0)); - } - } else - return SDValue(); - - // The shift amount is an i32. - if (EltVT.bitsGT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); - else if (EltVT.bitsLT(MVT::i32)) - BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); - - // The shift amount is identical so we can do a vector shift. - SDValue ValOp = N->getOperand(0); - switch (N->getOpcode()) { - default: - llvm_unreachable("Unknown shift opcode!"); - case ISD::SHL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRA: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v4i32: - case MVT::v8i16: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG); - } - case ISD::SRL: - switch (VT.getSimpleVT().SimpleTy) { - default: return SDValue(); - case MVT::v2i64: - case MVT::v4i32: - case MVT::v8i16: - case MVT::v4i64: - case MVT::v8i32: - case MVT::v16i16: - return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG); - } - } + return SDValue(); } // CMPEQCombine - Recognize the distinctive (AND (setcc ...) (setcc ..)) @@ -16346,13 +16486,19 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, // Validate that the Mask operand is a vector sra node. // FIXME: what to do for bytes, since there is a psignb/pblendvb, but // there is no psrai.b - if (Mask.getOpcode() != X86ISD::VSRAI) - return SDValue(); - - // Check that the SRA is all signbits. - SDValue SraC = Mask.getOperand(1); - unsigned SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); + unsigned SraAmt = ~0; + if (Mask.getOpcode() == ISD::SRA) { + SDValue Amt = Mask.getOperand(1); + if (isSplatVector(Amt.getNode())) { + SDValue SclrAmt = Amt->getOperand(0); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) + SraAmt = C->getZExtValue(); + } + } else if (Mask.getOpcode() == X86ISD::VSRAI) { + SDValue SraC = Mask.getOperand(1); + SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); + } if ((SraAmt + 1) != EltBits) return SDValue(); @@ -16526,11 +16672,10 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned RegSz = RegVT.getSizeInBits(); + // On Sandybridge unaligned 256bit loads are inefficient. ISD::LoadExtType Ext = Ld->getExtensionType(); unsigned Alignment = Ld->getAlignment(); - bool IsAligned = Alignment == 0 || Alignment == MemVT.getSizeInBits()/8; - - // On Sandybridge unaligned 256bit loads are inefficient. + bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8; if (RegVT.is256BitVector() && !Subtarget->hasInt256() && !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) { unsigned NumElems = RegVT.getVectorNumElements(); @@ -16550,7 +16695,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), - std::max(Alignment/2U, 1U)); + std::min(16U, Alignment)); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load1.getValue(1), Load2.getValue(1)); @@ -16721,13 +16866,13 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, DebugLoc dl = St->getDebugLoc(); SDValue StoredVal = St->getOperand(1); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned Alignment = St->getAlignment(); - bool IsAligned = Alignment == 0 || Alignment == VT.getSizeInBits()/8; // If we are saving a concatenation of two XMM registers, perform two stores. // On Sandy Bridge, 256-bit memory operations are executed by two // 128-bit ports. However, on Haswell it is better to issue a single 256-bit // memory operation. + unsigned Alignment = St->getAlignment(); + bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8; if (VT.is256BitVector() && !Subtarget->hasInt256() && StVT == VT && !IsAligned) { unsigned NumElems = VT.getVectorNumElements(); @@ -16747,7 +16892,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue Ch1 = DAG.getStore(St->getChain(), dl, Value1, Ptr1, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), - std::max(Alignment/2U, 1U)); + std::min(16U, Alignment)); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); } diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index da1dad0f40..5725f7aea5 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -356,10 +356,17 @@ namespace llvm { // RDRAND - Get a random integer and indicate whether it is valid in CF. RDRAND, + // RDSEED - Get a NIST SP800-90B & C compliant random integer and + // indicate whether it is valid in CF. + RDSEED, + // PCMP*STRI PCMPISTRI, PCMPESTRI, + // XTEST - Test if in transactional execution. + XTEST, + // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - // Atomic 64-bit binary operations. diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index bb362f5c7b..ba1aede3c1 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -84,13 +84,16 @@ defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; -def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; +def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", + [(int_x86_mmx_femms)]>; -def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), - "prefetch\t$addr", []>; +def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr), + "prefetch\t$addr", + [(prefetch addr:$addr, (i32 0), imm, (i32 1))]>; -def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), - "prefetchw\t$addr", []>; +def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", + [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))]>, TB, + Requires<[HasPrefetchW]>; // "3DNowA" instructions defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index d86a4065a7..225e9720da 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// // LEA - Load Effective Address - +let SchedRW = [WriteLEA] in { let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, (outs GR16:$dst), (ins i32mem:$src), @@ -36,41 +36,52 @@ let isReMaterializable = 1 in def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; - - +} // SchedRW //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions. // +// SchedModel info for instruction that loads one value and gets the second +// (and possibly third) value from a register. +// This is used for instructions that put the memory operands before other +// uses. +class SchedLoadReg<SchedWrite SW> : Sched<[SW, + // Memory operand. + ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, + // Register reads (implicit or explicit). + ReadAfterLd, ReadAfterLd]>; + // Extra precision multiplication // AL is really implied by AX, but the registers in Defs must match the // SDNode results (i8, i32). +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*GR8 - + (implicit EFLAGS)], IIC_MUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - [], IIC_MUL16_REG>, OpSize; // AX,DX = AX*GR16 - + [], IIC_MUL16_REG>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), - "mul{l}\t$src", // EAX,EDX = EAX*GR32 + "mul{l}\t$src", [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/], - IIC_MUL32_REG>; + IIC_MUL32_REG>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), - "mul{q}\t$src", // RAX,RDX = RAX*GR64 + "mul{q}\t$src", [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/], - IIC_MUL64>; - + IIC_MUL64>, Sched<[WriteIMul]>; +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", @@ -78,51 +89,60 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)], IIC_MUL8>; // AL,AH = AL*[mem8] - + (implicit EFLAGS)], IIC_MUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let mayLoad = 1, neverHasSideEffects = 1 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), "mul{w}\t$src", - [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16] - + [], IIC_MUL16_MEM>, OpSize, SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", - [], IIC_MUL32_MEM>; // EAX,EDX = EAX*[mem32] + [], IIC_MUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", [], IIC_MUL64>; // RAX,RDX = RAX*[mem64] + "mul{q}\t$src", [], IIC_MUL64>, SchedLoadReg<WriteIMulLd>; } let neverHasSideEffects = 1 in { +// AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", [], - IIC_IMUL8>; // AL,AH = AL*GR8 + IIC_IMUL8>, Sched<[WriteIMul]>; +// AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", [], - IIC_IMUL16_RR>, OpSize; // AX,DX = AX*GR16 + IIC_IMUL16_RR>, OpSize, Sched<[WriteIMul]>; +// EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", [], - IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32 + IIC_IMUL32_RR>, Sched<[WriteIMul]>; +// RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [], - IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64 + IIC_IMUL64_RR>, Sched<[WriteIMul]>; let mayLoad = 1 in { +// AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", [], IIC_IMUL8>; // AL,AH = AL*[mem8] + "imul{b}\t$src", [], IIC_IMUL8>, SchedLoadReg<WriteIMulLd>; +// AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize; - // AX,DX = AX*[mem16] + "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize, + SchedLoadReg<WriteIMulLd>; +// EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", [], IIC_IMUL32_MEM>; // EAX,EDX = EAX*[mem32] + "imul{l}\t$src", [], IIC_IMUL32_MEM>, SchedLoadReg<WriteIMulLd>; +// RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", [], IIC_IMUL64>; // RAX,RDX = RAX*[mem64] + "imul{q}\t$src", [], IIC_IMUL64>, SchedLoadReg<WriteIMulLd>; } } // neverHasSideEffects @@ -130,7 +150,8 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y +let isCommutable = 1, SchedRW = [WriteIMul] in { +// X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -148,9 +169,10 @@ def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>, TB; -} +} // isCommutable, SchedRW // Register-Memory Signed Integer Multiply +let SchedRW = [WriteIMulLd, ReadAfterLd] in { def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", @@ -172,12 +194,14 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (X86smul_flag GR64:$src1, (load addr:$src2)))], IIC_IMUL64_RM>, TB; +} // SchedRW } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] // Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { +let SchedRW = [WriteIMul] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), @@ -216,9 +240,10 @@ def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt8:$src2))], IIC_IMUL64_RRI>; - +} // SchedRW // Memory-Integer Signed Integer Multiply +let SchedRW = [WriteIMulLd] in { def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -260,6 +285,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (X86smul_flag (load addr:$src1), i64immSExt8:$src2))], IIC_IMUL64_RMI>; +} // SchedRW } // Defs = [EFLAGS] @@ -267,6 +293,7 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", [], IIC_DIV8_REG>; @@ -280,24 +307,30 @@ def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src), "div{q}\t$src", [], IIC_DIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "div{b}\t$src", [], IIC_DIV8_MEM>; + "div{b}\t$src", [], IIC_DIV8_MEM>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "div{w}\t$src", [], IIC_DIV16>, OpSize; + "div{w}\t$src", [], IIC_DIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), - "div{l}\t$src", [], IIC_DIV32>; + "div{l}\t$src", [], IIC_DIV32>, + SchedLoadReg<WriteIDivLd>; // RDX:RAX/[mem64] = RAX,RDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src), - "div{q}\t$src", [], IIC_DIV64>; + "div{q}\t$src", [], IIC_DIV64>, + SchedLoadReg<WriteIDivLd>; } // Signed division/remainder. +let SchedRW = [WriteIDiv] in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", [], IIC_IDIV8>; @@ -311,20 +344,25 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", [], IIC_IDIV64>; +} // SchedRW let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH - "idiv{b}\t$src", [], IIC_IDIV8>; + "idiv{b}\t$src", [], IIC_IDIV8>, + SchedLoadReg<WriteIDivLd>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX - "idiv{w}\t$src", [], IIC_IDIV16>, OpSize; + "idiv{w}\t$src", [], IIC_IDIV16>, OpSize, + SchedLoadReg<WriteIDivLd>; let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), - "idiv{l}\t$src", [], IIC_IDIV32>; + "idiv{l}\t$src", [], IIC_IDIV32>, + SchedLoadReg<WriteIDivLd>; let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), - "idiv{q}\t$src", [], IIC_IDIV64>; + "idiv{q}\t$src", [], IIC_IDIV64>, + SchedLoadReg<WriteIDivLd>; } } // hasSideEffects = 0 @@ -335,7 +373,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), "neg{b}\t$dst", [(set GR8:$dst, (ineg GR8:$src1)), @@ -351,8 +389,10 @@ def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src1)), (implicit EFLAGS)], IIC_UNARY_REG>; -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +// Read-modify-write negate. +let SchedRW = [WriteALULd, WriteRMW] in { def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), @@ -368,12 +408,13 @@ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; +} // SchedRW } // Defs = [EFLAGS] // Note: NOT does not set EFLAGS! -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -388,8 +429,9 @@ def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>; } -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; @@ -402,11 +444,12 @@ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>; +} // SchedRW } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "inc{b}\t$dst", @@ -454,9 +497,9 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), Requires<[In64BitMode]>; } // isConvertibleToThreeAddress = 1, CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -491,9 +534,9 @@ def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>, Requires<[In64BitMode]>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in { let CodeSize = 2 in def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "dec{b}\t$dst", @@ -514,10 +557,10 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))], IIC_UNARY_REG>; } // CodeSize = 2 -} // Constraints = "$src1 = $dst" +} // Constraints = "$src1 = $dst", SchedRW -let CodeSize = 2 in { +let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW] in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; @@ -532,7 +575,7 @@ let CodeSize = 2 in { def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", [(store (add (loadi64 addr:$dst), -1), addr:$dst), (implicit EFLAGS)], IIC_UNARY_MEM>; -} // CodeSize = 2 +} // CodeSize = 2, SchedRW } // Defs = [EFLAGS] @@ -646,7 +689,8 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f = MRMDestReg> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>, + Sched<[WriteALU]>; // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has // just a regclass (no eflags) as a result. @@ -689,7 +733,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs typeinfo.RegClass:$dst), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -699,7 +744,8 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo> : ITy<opcode, MRMSrcReg, typeinfo, (outs), (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM>, + Sched<[WriteALU]> { // The disassembler should know about this, but not the asmparser. let isCodeGenOnly = 1; let hasSideEffects = 0; @@ -710,7 +756,8 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, dag outlist, list<dag> pattern> : ITy<opcode, MRMSrcMem, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>; + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALULd, ReadAfterLd]>; // BinOpRM_R - Instructions like "add reg, reg, [mem]". class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -746,7 +793,8 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; } @@ -783,7 +831,8 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f, dag outlist, list<dag> pattern> : ITy<opcode, f, typeinfo, outlist, (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2), - mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> { + mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>, + Sched<[WriteALU]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -821,7 +870,8 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, list<dag> pattern> : ITy<opcode, MRMDestMem, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>; + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]>; // BinOpMR_RMW - Instructions like "add [mem], reg". class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, @@ -849,7 +899,8 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern, bits<8> opcode = 0x80> : ITy<opcode, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = typeinfo.ImmEncoding; } @@ -881,7 +932,8 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo, Format f, list<dag> pattern> : ITy<0x82, f, typeinfo, (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src), - mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> { + mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>, + Sched<[WriteALULd, WriteRMW]> { let ImmT = Imm8; // Always 8-bit immediate. } @@ -913,7 +965,7 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands> : ITy<opcode, RawFrm, typeinfo, (outs), (ins typeinfo.ImmOperand:$src), - mnemonic, operands, []> { + mnemonic, operands, []>, Sched<[WriteALU]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; let Defs = [areg]; @@ -1199,7 +1251,7 @@ let isCompare = 1, Defs = [EFLAGS] in { // register class is constrained to GR8_NOREX. let isPseudo = 1 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), - "", [], IIC_BIN_NONMEM>; + "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } //===----------------------------------------------------------------------===// @@ -1210,11 +1262,12 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop, def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, (X86and_flag (not RC:$src1), RC:$src2))], - IIC_BIN_NONMEM>; + IIC_BIN_NONMEM>, Sched<[WriteALU]>; def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, EFLAGS, - (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>; + (X86and_flag (not RC:$src1), (ld_frag addr:$src2)))], IIC_BIN_MEM>, + Sched<[WriteALULd, ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { @@ -1241,12 +1294,12 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - [], IIC_MUL8>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMul]>; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - [], IIC_MUL8>, T8XD, VEX_4V; + [], IIC_MUL8>, T8XD, VEX_4V, Sched<[WriteIMulLd]>; } } @@ -1261,6 +1314,7 @@ let Predicates = [HasBMI2] in { // ADCX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize; @@ -1268,8 +1322,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADCX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adcx{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8, OpSize, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adcx{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8, OpSize; @@ -1284,6 +1339,7 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { // ADOX Instruction // let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { + let SchedRW = [WriteALU] in { def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS; @@ -1291,8 +1347,9 @@ let hasSideEffects = 0, Predicates = [HasADX], Defs = [EFLAGS] in { def ADOX64rr : I<0xF6, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "adox{q}\t{$src, $dst|$dst, $src}", [], IIC_BIN_NONMEM>, T8XS, REX_W, Requires<[In64BitMode]>; + } // SchedRW - let mayLoad = 1 in { + let mayLoad = 1, SchedRW = [WriteALULd] in { def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "adox{l}\t{$src, $dst|$dst, $src}", [], IIC_BIN_MEM>, T8XS; diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td index 8f2d0a1aae..a967a4da5c 100644 --- a/lib/Target/X86/X86InstrCMovSetCC.td +++ b/lib/Target/X86/X86InstrCMovSetCC.td @@ -16,7 +16,7 @@ // SetCC instructions. multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", - isCommutable = 1 in { + isCommutable = 1, SchedRW = [WriteALU] in { def NAME#16rr : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -37,7 +37,8 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> { IIC_CMOV32_RR>, TB; } - let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in { + let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst", + SchedRW = [WriteALULd, ReadAfterLd] in { def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"), @@ -83,11 +84,11 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> { def r : I<opc, MRM0r, (outs GR8:$dst), (ins), !strconcat(Mnemonic, "\t$dst"), [(set GR8:$dst, (X86setcc OpNode, EFLAGS))], - IIC_SET_R>, TB; + IIC_SET_R>, TB, Sched<[WriteALU]>; def m : I<opc, MRM0m, (outs), (ins i8mem:$dst), !strconcat(Mnemonic, "\t$dst"), [(store (X86setcc OpNode, EFLAGS), addr:$dst)], - IIC_SET_M>, TB; + IIC_SET_M>, TB, Sched<[WriteALU, WriteStore]>; } // Uses = [EFLAGS] } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 734e5982b2..d9ff0c63c5 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -149,11 +149,12 @@ let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { //===----------------------------------------------------------------------===// // EH Pseudo Instructions // +let SchedRW = [WriteSystem] in { let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)], IIC_RET>; + [(X86ehret GR32:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -161,7 +162,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)], IIC_RET>; + [(X86ehret GR64:$addr)], IIC_RET>, Sched<[WriteJumpLd]>; } @@ -186,6 +187,7 @@ let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, Requires<[In64BitMode]>; } } +} // SchedRW let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), @@ -220,7 +222,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -229,11 +231,12 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize, + Sched<[WriteZero]>; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -245,7 +248,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -254,10 +257,10 @@ let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), "", [(set GR64:$dst, i64immZExt32:$src)], - IIC_ALU_NONMEM>; + IIC_ALU_NONMEM>, Sched<[WriteALU]>; // Use sbb to materialize carry bit. -let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1 in { +let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteALU] in { // FIXME: These are pseudo ops that should be replaced with Pat<> patterns. // However, Pat<> can't replicate the destination reg into the inputs of the // result. @@ -320,6 +323,7 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), //===----------------------------------------------------------------------===// // String Pseudo Instructions // +let SchedRW = [WriteMicrocoded] in { let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", [(X86rep_movs i8)], IIC_REP_MOVS>, REP, @@ -382,6 +386,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in { [(X86rep_stos i64)], IIC_REP_STOS>, REP, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -594,12 +599,13 @@ defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "or{l}\t{$zero, $dst|$dst, $zero}", - [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; + [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK, + Sched<[WriteALULd, WriteRMW]>; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", - [(X86MemBarrier)]>; + [(X86MemBarrier)]>, Sched<[WriteLoad]>; // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction @@ -607,7 +613,8 @@ def Int_MemBarrier : I<0, Pseudo, (outs), (ins), // ImmMod corresponds to the instruction format of the mi and mi8 versions multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, Format ImmMod, string mnemonic> { -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, @@ -694,7 +701,8 @@ defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, "xor">; // Optimized codegen when the non-memory output is not used. multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form, string mnemonic> { -let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { +let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst), !strconcat(mnemonic, "{b}\t$dst"), @@ -728,7 +736,7 @@ let isCodeGenOnly = 1 in { multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form, string mnemonic, SDPatternOperator frag, InstrItinClass itin8, InstrItinClass itin> { -let isCodeGenOnly = 1 in { +let isCodeGenOnly = 1, SchedRW = [WriteALULd, WriteRMW] in { let Defs = [AL, EFLAGS], Uses = [AL] in def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), @@ -748,14 +756,15 @@ let isCodeGenOnly = 1 in { } } -let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in { +let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], + SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG8B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg8b", X86cas8, i64mem, IIC_CMPX_LOCK_8B>; } let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], - Predicates = [HasCmpxchg16b] in { + Predicates = [HasCmpxchg16b], SchedRW = [WriteALULd, WriteRMW] in { defm LCMPXCHG16B : LCMPXCHG_UnOp<0xC7, MRM1m, "cmpxchg16b", X86cas16, i128mem, IIC_CMPX_LOCK_16B>, REX_W; @@ -768,7 +777,8 @@ defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", multiclass ATOMIC_LOAD_BINOP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin8, InstrItinClass itin> { - let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { + let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1, + SchedRW = [WriteALULd, WriteRMW] in { def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), @@ -990,9 +1000,6 @@ def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), // This corresponds to add $foo@tpoff, %rax def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; -// This corresponds to mov foo@tpoff(%rbx), %eax -def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), - (MOV64rm tglobaltlsaddr :$dst)>; // Direct PC relative function call for small code model. 32-bit displacement @@ -1192,7 +1199,8 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. -let AddedComplexity = 5 in { // Try this before the selecting to OR +// Try this before the selecting to OR. +let AddedComplexity = 5, SchedRW = [WriteALU] in { let isConvertibleToThreeAddress = 1, Constraints = "$src1 = $dst", Defs = [EFLAGS] in { @@ -1239,7 +1247,7 @@ def ADD64ri32_DB : I<0, Pseudo, [(set GR64:$dst, (or_is_add GR64:$src1, i64immSExt32:$src2))]>; } -} // AddedComplexity +} // AddedComplexity, SchedRW //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index bfe954114c..0e696513d4 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -20,7 +20,7 @@ // The X86retflag return instructions are variadic because we may add ST0 and // ST1 arguments when returning values on the x87 stack. let isTerminator = 1, isReturn = 1, isBarrier = 1, - hasCtrlDep = 1, FPForm = SpecialFP in { + hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in { def RET : I <0xC3, RawFrm, (outs), (ins variable_ops), "ret", [(X86retflag 0)], IIC_RET>; @@ -46,7 +46,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, } // Unconditional branches. -let isBarrier = 1, isBranch = 1, isTerminator = 1 in { +let isBarrier = 1, isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst), "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>; def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), @@ -58,7 +58,7 @@ let isBarrier = 1, isBranch = 1, isTerminator = 1 in { } // Conditional Branches. -let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in { +let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump] in { multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> { def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [], IIC_Jcc>; @@ -85,7 +85,7 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>; defm JG : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>; // jcx/jecx/jrcx instructions. -let isBranch = 1, isTerminator = 1 in { +let isBranch = 1, isTerminator = 1, SchedRW = [WriteJump] in { // These are the 32-bit versions of this instruction for the asmparser. In // 32-bit mode, the address size prefix is jcxz and the unprefixed version is // jecxz. @@ -110,36 +110,46 @@ let isBranch = 1, isTerminator = 1 in { // Indirect branches let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", - [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>; + [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>, + Sched<[WriteJump]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>; + [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, + Requires<[In32BitMode]>, Sched<[WriteJumpLd]>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", - [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>; + [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>, + Sched<[WriteJump]>; def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst", - [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>; + [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, + Requires<[In64BitMode]>, Sched<[WriteJumpLd]>; def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), - "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize; + "ljmp{w}\t{$seg, $off|$off, $seg}", [], + IIC_JMP_FAR_PTR>, OpSize, Sched<[WriteJump]>; def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), - "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>; + "ljmp{l}\t{$seg, $off|$off, $seg}", [], + IIC_JMP_FAR_PTR>, Sched<[WriteJump]>; def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), - "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>; + "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>, + Sched<[WriteJump]>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), - "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize; + "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize, + Sched<[WriteJumpLd]>; def FARJMP32m : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst), - "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>; + "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>, + Sched<[WriteJumpLd]>; } // Loop instructions - +let SchedRW = [WriteJump] in { def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>; def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>; def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>; +} //===----------------------------------------------------------------------===// // Call Instructions... @@ -152,27 +162,32 @@ let isCall = 1 in let Uses = [ESP] in { def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i32imm_pcrel:$dst), - "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>; + "call{l}\t$dst", [], IIC_CALL_RI>, + Requires<[In32BitMode]>, Sched<[WriteJump]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, - Requires<[In32BitMode]>; + Requires<[In32BitMode]>, Sched<[WriteJump]>; def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), - "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, - Requires<[In32BitMode]>; + "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], + IIC_CALL_MEM>, + Requires<[In32BitMode,FavorMemIndirectCall]>, + Sched<[WriteJumpLd]>; def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), "lcall{w}\t{$seg, $off|$off, $seg}", [], - IIC_CALL_FAR_PTR>, OpSize; + IIC_CALL_FAR_PTR>, OpSize, Sched<[WriteJump]>; def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs), (ins i32imm:$off, i16imm:$seg), "lcall{l}\t{$seg, $off|$off, $seg}", [], - IIC_CALL_FAR_PTR>; + IIC_CALL_FAR_PTR>, Sched<[WriteJump]>; def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst), - "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize; + "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize, + Sched<[WriteJumpLd]>; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), - "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>; + "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>, + Sched<[WriteJumpLd]>; // callw for 16 bit code for the assembler. let isAsmParserOnly = 1 in @@ -185,7 +200,7 @@ let isCall = 1 in // Tail call stuff. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in + isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in let Uses = [ESP] in { def TCRETURNdi : PseudoI<(outs), (ins i32imm_pcrel:$dst, i32imm:$offset), []>; @@ -216,7 +231,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, // RSP is marked as a use to prevent stack-pointer assignments that appear // immediately before calls from potentially appearing dead. Uses for argument // registers are added manually. -let isCall = 1, Uses = [RSP] in { +let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in { // NOTE: this pattern doesn't match "X86call imm", because we do not know // that the offset between an arbitrary immediate and the call will fit in // the 32-bit pcrel field that we have. @@ -231,7 +246,7 @@ let isCall = 1, Uses = [RSP] in { def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], IIC_CALL_MEM>, - Requires<[In64BitMode]>; + Requires<[In64BitMode,FavorMemIndirectCall]>; def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>; @@ -245,13 +260,12 @@ let isCall = 1, isCodeGenOnly = 1 in def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), "call{q}\t$dst", [], IIC_CALL_RI>, - Requires<[IsWin64]>; + Requires<[IsWin64]>, Sched<[WriteJump]>; } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - isCodeGenOnly = 1 in - let Uses = [RSP], - usesCustomInserter = 1 in { + isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1, + SchedRW = [WriteJump] in { def TCRETURNdi64 : PseudoI<(outs), (ins i64i32imm_pcrel:$dst, i32imm:$offset), []>; diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td index 2eb454ded2..6dc7175357 100644 --- a/lib/Target/X86/X86InstrExtension.td +++ b/lib/Target/X86/X86InstrExtension.td @@ -42,48 +42,54 @@ let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in { def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB, + Sched<[WriteALULd]>; def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; let neverHasSideEffects = 1 in { def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src), "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>, - TB, OpSize; + TB, OpSize, Sched<[WriteALULd]>; } // neverHasSideEffects = 1 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB, + Sched<[WriteALULd]>; def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movz{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // These are the same as the regular MOVZX32rr8 and MOVZX32rm8 // except that they use GR32_NOREX for the output operand register class @@ -92,12 +98,12 @@ let neverHasSideEffects = 1, isCodeGenOnly = 1 in { def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg, (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALU]>; let mayLoad = 1 in def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", - [], IIC_MOVZX>, TB; + [], IIC_MOVZX>, TB, Sched<[WriteALULd]>; } // MOVSX64rr8 always has a REX prefix and it has an 8-bit register @@ -106,38 +112,42 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem, // were generalized, this would require a special register class. def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "movs{bq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB; + [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB, + Sched<[WriteALU]>; def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movs{wq|x}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>, - TB; + TB, Sched<[WriteALULd]>; def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>, + Sched<[WriteALU]>; def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>; + [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>, + Sched<[WriteALULd]>; // movzbq and movzwq encodings for the disassembler def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src), "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALU]>; def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src), "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALU]>; def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // FIXME: These should be Pat patterns. let isCodeGenOnly = 1 in { @@ -145,17 +155,19 @@ let isCodeGenOnly = 1 in { // Use movzbl instead of movzbq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>, - TB; + TB, Sched<[WriteALULd]>; // Use movzwl instead of movzwq when the destination is a register; it's // equivalent due to implicit zero-extending, and it has a smaller encoding. def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB; + "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB, + Sched<[WriteALU]>; def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), "", [(set GR64:$dst, (zextloadi64i16 addr:$src))], - IIC_MOVZX>, TB; + IIC_MOVZX>, TB, Sched<[WriteALULd]>; // There's no movzlq instruction, but movl can be used for this purpose, using // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero @@ -165,9 +177,10 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), // necessarily all zero. In such cases, we fall back to these explicit zext // instructions. def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src), - "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>; + "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>, + Sched<[WriteALU]>; def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "", [(set GR64:$dst, (zextloadi64i32 addr:$src))], - IIC_MOVZX>; + IIC_MOVZX>, Sched<[WriteALULd]>; } diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 568726e08e..2224a08d59 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -422,7 +422,7 @@ def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; } -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def LD_F32m : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src", IIC_FLD>; def LD_F64m : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src", @@ -436,7 +436,7 @@ def ILD_F32m : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src", def ILD_F64m : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src", IIC_FILD>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def ST_F32m : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst", IIC_FST>; def ST_F64m : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst", @@ -481,7 +481,7 @@ def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, [(X86fp_to_i64mem RFP80:$src, addr:$op)]>; } // Predicates = [HasSSE3] -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst", IIC_FST>; def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst", @@ -491,6 +491,7 @@ def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), } // FP Stack manipulation instructions. +let SchedRW = [WriteMove] in { def LD_Frr : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op", IIC_FLD>, D9; def ST_Frr : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op", @@ -499,6 +500,7 @@ def ST_FPrr : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op", IIC_FST>, DD; def XCH_F : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op", IIC_FXCH>, D9; +} // Floating point constant loads. let isReMaterializable = 1 in { @@ -516,19 +518,23 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP, [(set RFP80:$dst, fpimm1)]>; } +let SchedRW = [WriteZero] in { def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz", IIC_FLDZ>, D9; def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1", IIC_FIST>, D9; - +} // Floating point compares. +let SchedRW = [WriteFAdd] in { def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP32:$lhs, RFP32:$rhs)))]>; def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP64:$lhs, RFP64:$rhs)))]>; def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, [(set FPSW, (trunc (X86cmp RFP80:$lhs, RFP80:$rhs)))]>; +} // SchedRW } // Defs = [FPSW] +let SchedRW = [WriteFAdd] in { // CC = ST(0) cmp ST(i) let Defs = [EFLAGS, FPSW] in { def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, @@ -566,8 +572,10 @@ def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg), "fcompi\t$reg", IIC_FCOMI>, DF; } +} // SchedRW // Floating point flag ops. +let SchedRW = [WriteALU] in { let Defs = [AX], Uses = [FPSW] in def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags (outs), (ins), "fnstsw %ax", @@ -576,23 +584,26 @@ def FNSTSW16r : I<0xE0, RawFrm, // AX = fp flags def FNSTCW16m : I<0xD9, MRM7m, // [mem16] = X87 control world (outs), (ins i16mem:$dst), "fnstcw\t$dst", [(X86fp_cwd_get16 addr:$dst)], IIC_FNSTCW>; - +} // SchedRW let mayLoad = 1 in def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16] - (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>; + (outs), (ins i16mem:$dst), "fldcw\t$dst", [], IIC_FLDCW>, + Sched<[WriteLoad]>; // FPU control instructions +let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW] in def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", [], IIC_FNINIT>, DB; def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg), "ffree\t$reg", IIC_FFREE>, DD; - // Clear exceptions let Defs = [FPSW] in def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", [], IIC_FNCLEX>, DB; +} // SchedRW // Operandless floating-point instructions for the disassembler. +let SchedRW = [WriteMicrocoded] in { def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", [], IIC_WAIT>; def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", [], IIC_FNOP>, D9; @@ -627,6 +638,7 @@ def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src), "fxrstorq\t$src", [], IIC_FXRSTOR>, TB, REX_W, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Non-Instruction Patterns diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 44e574d246..0ef9491eb7 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -45,14 +45,15 @@ def MRM_D0 : Format<45>; def MRM_D1 : Format<46>; def MRM_D4 : Format<47>; def MRM_D5 : Format<48>; -def MRM_D8 : Format<49>; -def MRM_D9 : Format<50>; -def MRM_DA : Format<51>; -def MRM_DB : Format<52>; -def MRM_DC : Format<53>; -def MRM_DD : Format<54>; -def MRM_DE : Format<55>; -def MRM_DF : Format<56>; +def MRM_D6 : Format<49>; +def MRM_D8 : Format<50>; +def MRM_D9 : Format<51>; +def MRM_DA : Format<52>; +def MRM_DB : Format<53>; +def MRM_DC : Format<54>; +def MRM_DD : Format<55>; +def MRM_DE : Format<56>; +def MRM_DF : Format<57>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our @@ -208,47 +209,47 @@ class PseudoI<dag oops, dag iops, list<dag> pattern> } class I<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + list<dag> pattern, InstrItinClass itin = NoItinerary, Domain d = GenericDomain> : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT, + list<dag> pattern, InstrItinClass itin = NoItinerary, Domain d = GenericDomain> : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> { let Pattern = pattern; let CodeSize = 3; } class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; @@ -257,12 +258,12 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, // FPStack Instruction Templates: // FPI - Floating Point Instruction template. class FPI<bits<8> o, Format F, dag outs, dag ins, string asm, - InstrItinClass itin = IIC_DEFAULT> + InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, [], itin> {} // FpI_ - Floating Point Pseudo Instruction template. Not Predicated. class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern, - InstrItinClass itin = IIC_DEFAULT> + InstrItinClass itin = NoItinerary> : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> { let FPForm = fp; let Pattern = pattern; @@ -275,14 +276,14 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern, // Iseg32 - 16-bit segment selector, 32-bit offset class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm16, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; } class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm32, outs, ins, asm, itin> { let Pattern = pattern; let CodeSize = 3; @@ -292,7 +293,7 @@ def __xs : XS; // SI - SSE 1 & 2 scalar instructions class SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); @@ -303,7 +304,7 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm, // SIi8 - SSE 1 & 2 scalar instructions class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin> { let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], !if(!eq(Prefix, __xs.Prefix), [UseSSE1], [UseSSE2])); @@ -350,25 +351,25 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // VPSI - SSE1 instructions with TB prefix in AVX form. class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE1]>; class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[UseSSE1]>; class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB, Requires<[UseSSE1]>; class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, Requires<[HasAVX]>; class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB, Requires<[HasAVX]>; @@ -388,42 +389,42 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, // MMX operands. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[UseSSE2]>; class S2SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[UseSSE2]>; class S2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[UseSSE2]>; class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[UseSSE2]>; class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[UseSSE2]>; class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD, Requires<[HasAVX]>; class VS2SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS, Requires<[HasAVX]>; class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[HasAVX]>; class MMXSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>; class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>; // SSE3 Instruction Templates: @@ -433,15 +434,15 @@ class MMXS2SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // S3DI - SSE3 instructions with XD prefix. class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS, Requires<[UseSSE3]>; class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD, Requires<[UseSSE3]>; class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize, Requires<[UseSSE3]>; @@ -458,19 +459,19 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, // classes. They need to be enabled even if AVX is enabled. class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[UseSSSE3]>; class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[UseSSSE3]>; class MMXSS38I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasSSSE3]>; class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasSSSE3]>; @@ -480,11 +481,11 @@ class MMXSS3AI<bits<8> o, Format F, dag outs, dag ins, string asm, // SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8. // class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[UseSSE41]>; class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[UseSSE41]>; @@ -492,19 +493,19 @@ class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // // SS428I - SSE 4.2 instructions with T8 prefix. class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[UseSSE42]>; // SS42FI - SSE 4.2 instructions with T8XD prefix. // NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns. class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>; // SS42AI = SSE 4.2 instructions with TA prefix class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[UseSSE42]>; @@ -514,11 +515,11 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX8I - AVX instructions with T8 and OpSize prefix. // AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8. class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX]>; class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX]>; @@ -528,11 +529,11 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AVX28I - AVX2 instructions with T8 and OpSize prefix. // AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8. class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize, Requires<[HasAVX2]>; class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasAVX2]>; @@ -541,53 +542,53 @@ class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // AES8I // These use the same encoding as the SSE4.2 T8 and TA encodings. class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, Requires<[HasAES]>; class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, Requires<[HasAES]>; // PCLMUL Instruction Templates class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, Requires<[HasPCLMUL]>; class AVXPCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, Requires<[HasAVX, HasPCLMUL]>; // FMA3 Instruction Templates class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, T8, - OpSize, VEX_4V, Requires<[HasFMA]>; + OpSize, VEX_4V, FMASC, Requires<[HasFMA]>; // FMA4 Instruction Templates class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, TA, - OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>; + OpSize, VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP9, Requires<[HasXOP]>; // XOP 2, 3 and 4 Operand Instruction Templates with imm byte class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XOP, XOP8, Requires<[HasXOP]>; // XOP 5 operand instruction (VEX encoding!) class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag>pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>; @@ -595,33 +596,33 @@ class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm, // class RI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W; class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W { let Pattern = pattern; let CodeSize = 3; } class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W; class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W; // MMX Instruction templates @@ -635,23 +636,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm, // MMXID - MMX instructions with XD prefix. // MMXIS - MMX instructions with XS prefix. class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>; class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>; class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>; class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>; class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>; class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, - list<dag> pattern, InstrItinClass itin = IIC_DEFAULT> + list<dag> pattern, InstrItinClass itin = NoItinerary> : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 17714acd86..7ba542c875 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3655,7 +3655,16 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Align) const { const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0; + bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect(); bool isTwoAddrFold = false; + + // Atom favors register form of call. So, we do not fold loads into calls + // when X86Subtarget is Atom. + if (isCallRegIndirect && + (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) { + return NULL; + } + unsigned NumOps = MI->getDesc().getNumOperands(); bool isTwoAddr = NumOps > 1 && MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index d989ec7bb0..ccc1aa2e35 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -142,6 +142,9 @@ def X86sahf : SDNode<"X86ISD::SAHF", SDTX86sahf>; def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; +def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand, + [SDNPHasChain, SDNPSideEffect]>; + def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -603,7 +606,12 @@ def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; def HasRTM : Predicate<"Subtarget->hasRTM()">; +def HasHLE : Predicate<"Subtarget->hasHLE()">; +def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">; def HasADX : Predicate<"Subtarget->hasADX()">; +def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; +def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; +def HasPrefetchW : Predicate<"Subtarget->has3DNow() || Subtarget->hasPRFCHW()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">; @@ -626,6 +634,7 @@ def OptForSize : Predicate<"OptForSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; +def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. @@ -758,7 +767,7 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{ // // Nop -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteZero] in { def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", [], IIC_NOP>; def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero), "nop{w}\t$zero", [], IIC_NOP>, TB, OpSize; @@ -769,8 +778,9 @@ let neverHasSideEffects = 1 in { // Constructing a stack frame. def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl), - "enter\t$len, $lvl", [], IIC_ENTER>; + "enter\t$len, $lvl", [], IIC_ENTER>, Sched<[WriteMicrocoded]>; +let SchedRW = [WriteALU] in { let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", [], IIC_LEAVE>, @@ -780,13 +790,14 @@ let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", [], IIC_LEAVE>, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", [], IIC_POP_REG16>, OpSize; def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", [], @@ -803,9 +814,9 @@ def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", [], def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", [], IIC_POP_F>, OpSize; def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", [], IIC_POP_FD>, Requires<[In32BitMode]>; -} +} // mayLoad, SchedRW -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[], IIC_PUSH_REG>, OpSize; def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[], @@ -832,29 +843,30 @@ def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", [], IIC_PUSH_F>, def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", [], IIC_PUSH_F>, Requires<[In32BitMode]>; -} +} // mayStore, SchedRW } let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in { -let mayLoad = 1 in { +let mayLoad = 1, SchedRW = [WriteLoad] in { def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", [], IIC_POP_REG>; def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", [], IIC_POP_MEM>; -} -let mayStore = 1 in { +} // mayLoad, SchedRW +let mayStore = 1, SchedRW = [WriteStore] in { def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [], IIC_PUSH_REG>; def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [], IIC_PUSH_MEM>; -} +} // mayStore, SchedRW } -let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in { +let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1, + SchedRW = [WriteStore] in { def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm), "push{q}\t$imm", [], IIC_PUSH_IMM>; def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), @@ -865,23 +877,24 @@ def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm), let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", [], IIC_POP_FD>, - Requires<[In64BitMode]>; + Requires<[In64BitMode]>, Sched<[WriteLoad]>; let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", [], IIC_PUSH_F>, - Requires<[In64BitMode]>; + Requires<[In64BitMode]>, Sched<[WriteStore]>; let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], - mayLoad=1, neverHasSideEffects=1 in { + mayLoad = 1, neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l|d}", [], IIC_POP_A>, Requires<[In32BitMode]>; } let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], - mayStore=1, neverHasSideEffects=1 in { + mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l|d}", [], IIC_PUSH_A>, Requires<[In32BitMode]>; } -let Constraints = "$src = $dst" in { // GR32 = bswap GR32 +let Constraints = "$src = $dst", SchedRW = [WriteALU] in { +// GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -890,60 +903,63 @@ def BSWAP32r : I<0xC8, AddRegFrm, def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))], IIC_BSWAP>, TB; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW // Bit scan instructions. let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))], - IIC_BSF>, TB, OpSize; + IIC_BSF>, TB, OpSize, Sched<[WriteShift]>; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))], - IIC_BSF>, TB, OpSize; + IIC_BSF>, TB, OpSize, Sched<[WriteShiftLd]>; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB; + [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))], IIC_BSF>, TB, + Sched<[WriteShift]>; def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))], - IIC_BSF>, TB; + IIC_BSF>, TB, Sched<[WriteShiftLd]>; def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))], - IIC_BSF>, TB; + IIC_BSF>, TB, Sched<[WriteShift]>; def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsf{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))], - IIC_BSF>, TB; + IIC_BSF>, TB, Sched<[WriteShiftLd]>; def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))], IIC_BSR>, - TB, OpSize; + TB, OpSize, Sched<[WriteShift]>; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))], IIC_BSR>, TB, - OpSize; + OpSize, Sched<[WriteShiftLd]>; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB; + [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))], IIC_BSR>, TB, + Sched<[WriteShift]>; def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))], - IIC_BSR>, TB; + IIC_BSR>, TB, Sched<[WriteShiftLd]>; def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "bsr{q}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB; + [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))], IIC_BSR>, TB, + Sched<[WriteShift]>; def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "bsr{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))], - IIC_BSR>, TB; + IIC_BSR>, TB, Sched<[WriteShiftLd]>; } // Defs = [EFLAGS] - +let SchedRW = [WriteMicrocoded] in { // These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in { def MOVSB : I<0xA4, RawFrm, (outs), (ins), "movsb", [], IIC_MOVS>; @@ -971,12 +987,12 @@ def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmpsb", [], IIC_CMPS>; def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmpsw", [], IIC_CMPS>, OpSize; def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l|d}", [], IIC_CMPS>; def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", [], IIC_CMPS>; - +} // SchedRW //===----------------------------------------------------------------------===// // Move Instructions. // - +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in { def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; @@ -987,6 +1003,7 @@ def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } + let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", @@ -1004,7 +1021,9 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>; } +} // SchedRW +let SchedRW = [WriteStore] in { def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>; @@ -1017,9 +1036,11 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src), def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW /// moffs8, moffs16 and moffs32 versions of moves. The immediate is a /// 32-bit offset from the PC. These are only valid in x86-32 mode. +let SchedRW = [WriteALU] in { def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src), "mov{b}\t{$src, %al|AL, $src}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; @@ -1038,6 +1059,7 @@ def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins), def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins), "mov{l}\t{%eax, $dst|$dst, EAX}", [], IIC_MOV_MEM>, Requires<[In32BitMode]>; +} // FIXME: These definitions are utterly broken // Just leave them commented out for now because they're useless outside @@ -1055,7 +1077,7 @@ def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins), */ -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), @@ -1066,7 +1088,7 @@ def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; } -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(set GR8:$dst, (loadi8 addr:$src))], IIC_MOV_MEM>; @@ -1081,6 +1103,7 @@ def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), [(set GR64:$dst, (load addr:$src))], IIC_MOV_MEM>; } +let SchedRW = [WriteStore] in { def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "mov{b}\t{$src, $dst|$dst, $src}", [(store GR8:$src, addr:$dst)], IIC_MOV_MEM>; @@ -1093,6 +1116,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(store GR64:$src, addr:$dst)], IIC_MOV_MEM>; +} // SchedRW // Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so // that they can be used for copying and storing h registers, which can't be @@ -1101,34 +1125,37 @@ let isCodeGenOnly = 1 in { let neverHasSideEffects = 1 in def MOV8rr_NOREX : I<0x88, MRMDestReg, (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src), - "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], IIC_MOV>, + Sched<[WriteMove]>; let mayStore = 1 in def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteStore]>; let mayLoad = 1, neverHasSideEffects = 1, canFoldAsLoad = 1, isReMaterializable = 1 in def MOV8rm_NOREX : I<0x8A, MRMSrcMem, (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src), "mov{b}\t{$src, $dst|$dst, $src} # NOREX", [], - IIC_MOV_MEM>; + IIC_MOV_MEM>, Sched<[WriteLoad]>; } // Condition code ops, incl. set if equal/not equal/... +let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", [(set EFLAGS, (X86sahf AH))], IIC_AHF>; let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], IIC_AHF>; // AH = flags - +} // SchedRW //===----------------------------------------------------------------------===// // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { +let SchedRW = [WriteALU] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))], IIC_BT_RR>, @@ -1139,13 +1166,14 @@ def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))], IIC_BT_RR>, TB; +} // SchedRW // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly // only for now. -let mayLoad = 1, hasSideEffects = 0 in { +let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", // [(X86bt (loadi16 addr:$src1), GR16:$src2), @@ -1166,6 +1194,7 @@ let mayLoad = 1, hasSideEffects = 0 in { >, TB; } +let SchedRW = [WriteALU] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))], @@ -1178,10 +1207,12 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))], IIC_BT_RI>, TB; +} // SchedRW // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. +let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) @@ -1194,8 +1225,10 @@ def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt (loadi64 addr:$src1), i64immSExt8:$src2))], IIC_BT_MI>, TB; +} // SchedRW let hasSideEffects = 0 in { +let SchedRW = [WriteALU] in { def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1203,8 +1236,9 @@ def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1214,6 +1248,7 @@ def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1221,8 +1256,9 @@ def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btc{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1232,6 +1268,7 @@ def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1239,8 +1276,9 @@ def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1250,6 +1288,7 @@ def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1257,8 +1296,9 @@ def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2), "btr{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1268,6 +1308,7 @@ def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "btr{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, TB; } +let SchedRW = [WriteALU] in { def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, OpSize, TB; @@ -1275,8 +1316,9 @@ def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RR>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, OpSize, TB; @@ -1286,6 +1328,7 @@ def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MR>, TB; } +let SchedRW = [WriteALU] in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, OpSize, TB; @@ -1293,8 +1336,9 @@ def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bts{l}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bts{q}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_RI>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", [], IIC_BTX_MI>, OpSize, TB; @@ -1315,7 +1359,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), // operand is referenced, the atomicity is ensured. multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, InstrItinClass itin> { - let Constraints = "$val = $dst" in { + let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in { def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), @@ -1350,6 +1394,7 @@ multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag, defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap", IIC_XCHG_MEM>; // Swap between registers. +let SchedRW = [WriteALU] in { let Constraints = "$val = $dst" in { def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src), "xchg{b}\t{$val, $src|$src, $val}", [], IIC_XCHG_REG>; @@ -1374,9 +1419,9 @@ def XCHG32ar64 : I<0x90, AddRegFrm, (outs), (ins GR32_NOAX:$src), Requires<[In64BitMode]>; def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src), "xchg{q}\t{$src, %rax|RAX, $src}", [], IIC_XCHG_REG>; +} // SchedRW - - +let SchedRW = [WriteALU] in { def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), @@ -1386,8 +1431,9 @@ def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), "xadd{l}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "xadd{q}\t{$src, $dst|$dst, $src}", [], IIC_XADD_REG>, TB; +} // SchedRW -let mayLoad = 1, mayStore = 1 in { +let mayLoad = 1, mayStore = 1, SchedRW = [WriteALULd, WriteRMW] in { def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "xadd{b}\t{$src, $dst|$dst, $src}", [], IIC_XADD_MEM>, TB; def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), @@ -1400,6 +1446,7 @@ def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), } +let SchedRW = [WriteALU] in { def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG8>, TB; @@ -1412,7 +1459,9 @@ def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src), def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src), "cmpxchg{q}\t{$src, $dst|$dst, $src}", [], IIC_CMPXCHG_REG>, TB; +} // SchedRW +let SchedRW = [WriteALULd, WriteRMW] in { let mayLoad = 1, mayStore = 1 in { def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "cmpxchg{b}\t{$src, $dst|$dst, $src}", [], @@ -1436,7 +1485,7 @@ let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst), "cmpxchg16b\t$dst", [], IIC_CMPXCHG_16B>, TB, Requires<[HasCmpxchg16b]>; - +} // SchedRW // Lock instruction prefix @@ -1459,17 +1508,21 @@ def REPNE_PREFIX : I<0xF2, RawFrm, (outs), (ins), "repne", []>; // String manipulation instructions +let SchedRW = [WriteMicrocoded] in { def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", [], IIC_LODS>; def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", [], IIC_LODS>, OpSize; def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", [], IIC_LODS>; def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", [], IIC_LODS>; +} +let SchedRW = [WriteSystem] in { def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", [], IIC_OUTS>; def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", [], IIC_OUTS>, OpSize; def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", [], IIC_OUTS>; - +} // Flag instructions +let SchedRW = [WriteALU] in { def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", [], IIC_CLC>; def STC : I<0xF9, RawFrm, (outs), (ins), "stc", [], IIC_STC>; def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", [], IIC_CLI>; @@ -1479,10 +1532,13 @@ def STD : I<0xFD, RawFrm, (outs), (ins), "std", [], IIC_STD>; def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", [], IIC_CMC>; def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB; +} // Table lookup instructions -def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>; +def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>, + Sched<[WriteLoad]>; +let SchedRW = [WriteMicrocoded] in { // ASCII Adjust After Addition // sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>, @@ -1512,7 +1568,9 @@ def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>, // sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>, Requires<[In32BitMode]>; +} // SchedRW +let SchedRW = [WriteSystem] in { // Check Array Index Against Bounds def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bound\t{$src, $dst|$dst, $src}", [], IIC_BOUND>, OpSize, @@ -1528,11 +1586,13 @@ def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src), def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "arpl\t{$src, $dst|$dst, $src}", [], IIC_ARPL_MEM>, Requires<[In32BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // MOVBE Instructions // let Predicates = [HasMOVBE] in { + let SchedRW = [WriteALULd] in { def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (bswap (loadi16 addr:$src)))], IIC_MOVBE>, @@ -1545,6 +1605,8 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bswap (loadi64 addr:$src)))], IIC_MOVBE>, T8; + } + let SchedRW = [WriteStore] in { def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src), "movbe{w}\t{$src, $dst|$dst, $src}", [(store (bswap GR16:$src), addr:$dst)], IIC_MOVBE>, @@ -1557,6 +1619,7 @@ let Predicates = [HasMOVBE] in { "movbe{q}\t{$src, $dst|$dst, $src}", [(store (bswap GR64:$src), addr:$dst)], IIC_MOVBE>, T8; + } } //===----------------------------------------------------------------------===// @@ -1575,6 +1638,21 @@ let Predicates = [HasRDRAND], Defs = [EFLAGS] in { } //===----------------------------------------------------------------------===// +// RDSEED Instruction +// +let Predicates = [HasRDSEED], Defs = [EFLAGS] in { + def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), + "rdseed{w}\t$dst", + [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize, TB; + def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), + "rdseed{l}\t$dst", + [(set GR32:$dst, EFLAGS, (X86rdseed))]>, TB; + def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), + "rdseed{q}\t$dst", + [(set GR64:$dst, EFLAGS, (X86rdseed))]>, TB; +} + +//===----------------------------------------------------------------------===// // LZCNT Instruction // let Predicates = [HasLZCNT], Defs = [EFLAGS] in { diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 127af6f7f9..49721df7c1 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -20,6 +20,7 @@ // MMX Multiclasses //===----------------------------------------------------------------------===// +let Sched = WriteVecALU in { def MMX_INTALU_ITINS : OpndItins< IIC_MMX_ALU_RR, IIC_MMX_ALU_RM >; @@ -35,11 +36,14 @@ def MMX_PHADDSUBW : OpndItins< def MMX_PHADDSUBD : OpndItins< IIC_MMX_PHADDSUBD_RR, IIC_MMX_PHADDSUBD_RM >; +} +let Sched = WriteVecIMul in def MMX_PMUL_ITINS : OpndItins< IIC_MMX_PMUL, IIC_MMX_PMUL >; +let Sched = WriteVecALU in { def MMX_PSADBW_ITINS : OpndItins< IIC_MMX_PSADBW, IIC_MMX_PSADBW >; @@ -47,11 +51,13 @@ def MMX_PSADBW_ITINS : OpndItins< def MMX_MISC_FUNC_ITINS : OpndItins< IIC_MMX_MISC_FUNC_MEM, IIC_MMX_MISC_FUNC_REG >; +} def MMX_SHIFT_ITINS : ShiftOpndItins< IIC_MMX_SHIFT_RR, IIC_MMX_SHIFT_RM, IIC_MMX_SHIFT_RI >; +let Sched = WriteShuffle in { def MMX_UNPCK_H_ITINS : OpndItins< IIC_MMX_UNPCK_H_RR, IIC_MMX_UNPCK_H_RM >; @@ -67,7 +73,9 @@ def MMX_PCK_ITINS : OpndItins< def MMX_PSHUF_ITINS : OpndItins< IIC_MMX_PSHUF, IIC_MMX_PSHUF >; +} // Sched +let Sched = WriteCvtF2I in { def MMX_CVT_PD_ITINS : OpndItins< IIC_MMX_CVT_PD_RR, IIC_MMX_CVT_PD_RM >; @@ -75,6 +83,7 @@ def MMX_CVT_PD_ITINS : OpndItins< def MMX_CVT_PS_ITINS : OpndItins< IIC_MMX_CVT_PS_RR, IIC_MMX_CVT_PS_RM >; +} let Constraints = "$src1 = $dst" in { // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. @@ -84,7 +93,8 @@ let Constraints = "$src1 = $dst" in { def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr> { + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>, + Sched<[itins.Sched]> { let isCommutable = Commutable; } def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), @@ -92,7 +102,7 @@ let Constraints = "$src1 = $dst" in { !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, @@ -101,17 +111,19 @@ let Constraints = "$src1 = $dst" in { def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>; + [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))], itins.rr>, + Sched<[WriteVecShift]>; def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId VR64:$src1, (bitconvert (load_mmx addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst), (ins VR64:$src1, i32i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>; + [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))], itins.ri>, + Sched<[WriteVecShift]>; } } @@ -120,13 +132,14 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr, Intrinsic IntId64, OpndItins itins> { def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>; + [(set VR64:$dst, (IntId64 VR64:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 (bitconvert (memopmmx addr:$src))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// Binary MMX instructions requiring SSSE3. @@ -137,13 +150,15 @@ multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr, def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>; + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm64 : MMXSS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins VR64:$src1, i64mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), [(set VR64:$dst, (IntId64 VR64:$src1, - (bitconvert (memopmmx addr:$src2))))], itins.rm>; + (bitconvert (memopmmx addr:$src2))))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -164,9 +179,11 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, string asm, OpndItins itins, Domain d> { def irr : MMXPI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr, d>, + Sched<[itins.Sched]>; def irm : MMXPI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>; + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm, d>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -174,11 +191,11 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, PatFrag ld_frag, string asm, Domain d> { def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2), asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - IIC_DEFAULT, d>; + NoItinerary, d>; def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), asm, [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - IIC_DEFAULT, d>; + NoItinerary, d>; } //===----------------------------------------------------------------------===// @@ -197,16 +214,17 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (scalar_to_vector GR32:$src)))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; let canFoldAsLoad = 1 in def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (scalar_to_vector (loadi32 addr:$src))))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>; let mayStore = 1 in def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), - "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>; + "movd\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOV_MM_RM>, + Sched<[WriteStore]>; // Low word of MMX to GPR. def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, @@ -214,16 +232,18 @@ def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, - (MMX_X86movd2w (x86mmx VR64:$src)))], IIC_MMX_MOV_REG_MM>; + (MMX_X86movd2w (x86mmx VR64:$src)))], + IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; let neverHasSideEffects = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movd\t{$src, $dst|$dst, $src}", - [], IIC_MMX_MOV_MM_RM>; + [], IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. +let SchedRW = [WriteMove] in { def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", @@ -237,6 +257,9 @@ let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RR>; +} // SchedRW + +let SchedRW = [WriteLoad] in { let canFoldAsLoad = 1 in def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -246,7 +269,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (x86mmx VR64:$src), addr:$dst)], IIC_MMX_MOVQ_RM>; +} // SchedRW +let SchedRW = [WriteMove] in { def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}", [(set VR64:$dst, @@ -271,11 +296,12 @@ def MMX_MOVQ2FR64rr: MMXS2SIi8<0xD6, MRMSrcReg, (outs FR64:$dst), def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RR>; +} // SchedRW def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)], - IIC_MMX_MOVQ_RM>; + IIC_MMX_MOVQ_RM>, Sched<[WriteStore]>; let AddedComplexity = 15 in // movd to MMX register zero-extends @@ -283,7 +309,7 @@ def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>; let AddedComplexity = 20 in def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), @@ -291,7 +317,7 @@ def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), [(set VR64:$dst, (x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))))], - IIC_MMX_MOV_MM_RM>; + IIC_MMX_MOV_MM_RM>, Sched<[WriteLoad]>; // Arithmetic Instructions defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b, @@ -491,14 +517,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg, "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))], - IIC_MMX_PSHUF>; + IIC_MMX_PSHUF>, Sched<[WriteShuffle]>; def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2), "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR64:$dst, (int_x86_sse_pshuf_w (load_mmx addr:$src1), imm:$src2))], - IIC_MMX_PSHUF>; + IIC_MMX_PSHUF>, Sched<[WriteShuffleLd]>; @@ -532,7 +558,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg, "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1, (iPTR imm:$src2)))], - IIC_MMX_PEXTR>; + IIC_MMX_PEXTR>, Sched<[WriteShuffle]>; let Constraints = "$src1 = $dst" in { def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg, (outs VR64:$dst), @@ -540,7 +566,7 @@ let Constraints = "$src1 = $dst" in { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, GR32:$src2, (iPTR imm:$src3)))], - IIC_MMX_PINSRW>; + IIC_MMX_PINSRW>, Sched<[WriteShuffle]>; def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem, (outs VR64:$dst), @@ -549,7 +575,7 @@ let Constraints = "$src1 = $dst" in { [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1, (i32 (anyext (loadi16 addr:$src2))), (iPTR imm:$src3)))], - IIC_MMX_PINSRW>; + IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>; } // Mask creation @@ -570,6 +596,7 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))), (x86mmx (MMX_MOVQ64rm addr:$src))>; // Misc. +let SchedRW = [WriteShuffle] in { let Uses = [EDI] in def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", @@ -580,6 +607,7 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask), "maskmovq\t{$mask, $src|$src, $mask}", [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)], IIC_MMX_MASKMOV>; +} // 64-bit bit convert. let Predicates = [HasSSE2] in { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 0979752757..384238741b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -16,6 +16,8 @@ class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { InstrItinClass rr = arg_rr; InstrItinClass rm = arg_rm; + // InstrSchedModel info. + X86FoldableSchedWrite Sched = WriteFAdd; } class SizeItins<OpndItins arg_s, OpndItins arg_d> { @@ -33,6 +35,7 @@ class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, // scalar +let Sched = WriteFAdd in { def SSE_ALU_F32S : OpndItins< IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM >; @@ -40,11 +43,13 @@ def SSE_ALU_F32S : OpndItins< def SSE_ALU_F64S : OpndItins< IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM >; +} def SSE_ALU_ITINS_S : SizeItins< SSE_ALU_F32S, SSE_ALU_F64S >; +let Sched = WriteFMul in { def SSE_MUL_F32S : OpndItins< IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM >; @@ -52,11 +57,13 @@ def SSE_MUL_F32S : OpndItins< def SSE_MUL_F64S : OpndItins< IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM >; +} def SSE_MUL_ITINS_S : SizeItins< SSE_MUL_F32S, SSE_MUL_F64S >; +let Sched = WriteFDiv in { def SSE_DIV_F32S : OpndItins< IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM >; @@ -64,12 +71,14 @@ def SSE_DIV_F32S : OpndItins< def SSE_DIV_F64S : OpndItins< IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM >; +} def SSE_DIV_ITINS_S : SizeItins< SSE_DIV_F32S, SSE_DIV_F64S >; // parallel +let Sched = WriteFAdd in { def SSE_ALU_F32P : OpndItins< IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM >; @@ -77,11 +86,13 @@ def SSE_ALU_F32P : OpndItins< def SSE_ALU_F64P : OpndItins< IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM >; +} def SSE_ALU_ITINS_P : SizeItins< SSE_ALU_F32P, SSE_ALU_F64P >; +let Sched = WriteFMul in { def SSE_MUL_F32P : OpndItins< IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM >; @@ -89,11 +100,13 @@ def SSE_MUL_F32P : OpndItins< def SSE_MUL_F64P : OpndItins< IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM >; +} def SSE_MUL_ITINS_P : SizeItins< SSE_MUL_F32P, SSE_MUL_F64P >; +let Sched = WriteFDiv in { def SSE_DIV_F32P : OpndItins< IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM >; @@ -101,6 +114,7 @@ def SSE_DIV_F32P : OpndItins< def SSE_DIV_F64P : OpndItins< IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM >; +} def SSE_DIV_ITINS_P : SizeItins< SSE_DIV_F32P, SSE_DIV_F64P @@ -110,6 +124,7 @@ def SSE_BIT_ITINS_P : OpndItins< IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM >; +let Sched = WriteVecALU in { def SSE_INTALU_ITINS_P : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; @@ -117,7 +132,9 @@ def SSE_INTALU_ITINS_P : OpndItins< def SSE_INTALUQ_ITINS_P : OpndItins< IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM >; +} +let Sched = WriteVecIMul in def SSE_INTMUL_ITINS_P : OpndItins< IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM >; @@ -148,13 +165,15 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; } def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class @@ -169,14 +188,16 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>( !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, RC:$src2))], itins.rr>; + RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) - RC:$src1, mem_cpat:$src2))], itins.rm>; + RC:$src1, mem_cpat:$src2))], itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed - SSE 1 & 2 packed instructions class @@ -189,14 +210,16 @@ multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, + Sched<[itins.Sched]>; let mayLoad = 1 in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], - itins.rm, d>; + itins.rm, d>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class @@ -209,12 +232,14 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rr, IIC_DEFAULT, d>; + pat_rr, NoItinerary, d>, + Sched<[WriteVecLogic]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - pat_rm, IIC_DEFAULT, d>; + pat_rm, NoItinerary, d>, + Sched<[WriteVecLogicLd, ReadAfterLd]>; } //===----------------------------------------------------------------------===// @@ -345,7 +370,7 @@ let Predicates = [HasAVX] in { // Alias instructions that map fld0 to xorps for sse or vxorps for avx. // This is expanded by ExpandPostRAPseudos. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", @@ -362,7 +387,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-zeros value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4f32 immAllZerosV))]>; } @@ -379,7 +404,7 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>; // at the rename stage without using any execution unit, so SET0PSY // and SET0PDY can be used for vector int instructions without penalty let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1, Predicates = [HasAVX] in { + isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", [(set VR256:$dst, (v8f32 immAllZerosV))]>; } @@ -417,7 +442,7 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), // We set canFoldAsLoad because this can be converted to a constant-pool // load of an all-ones value if folding it would be beneficial. let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isPseudo = 1 in { + isPseudo = 1, SchedRW = [WriteZero] in { def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", [(set VR128:$dst, (v4i32 immAllOnesV))]>; let Predicates = [HasAVX2] in @@ -444,14 +469,14 @@ multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, !strconcat(base_opc, asm_opr), [(set VR128:$dst, (vt (OpNode VR128:$src1, (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>; + IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; // For the disassembler let isCodeGenOnly = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), !strconcat(base_opc, asm_opr), - [], IIC_SSE_MOV_S_RR>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteMove]>; } multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, @@ -464,7 +489,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - VEX, VEX_LIG; + VEX, VEX_LIG, Sched<[WriteStore]>; // SSE1 & 2 let Constraints = "$src1 = $dst" in { defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, @@ -473,7 +498,8 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; + [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, + Sched<[WriteStore]>; } // Loading from memory automatically zeroing upper bits. @@ -482,11 +508,11 @@ multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>, VEX, VEX_LIG; + IIC_SSE_MOV_S_RM>, VEX, VEX_LIG, Sched<[WriteLoad]>; def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (mem_pat addr:$src))], - IIC_SSE_MOV_S_RM>; + IIC_SSE_MOV_S_RM>, Sched<[WriteLoad]>; } defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss">, XS; @@ -745,11 +771,13 @@ multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, + Sched<[WriteMove]>; let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; + [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, + Sched<[WriteLoad]>; } defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, @@ -790,6 +818,7 @@ defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, TB, OpSize; +let SchedRW = [WriteStore] in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -822,9 +851,10 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>, VEX, VEX_L; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], @@ -880,6 +910,7 @@ def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), (VMOVUPDYmr addr:$dst, VR256:$src)>; +let SchedRW = [WriteStore] in { def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)], @@ -896,9 +927,10 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v2f64 VR128:$src), addr:$dst)], IIC_SSE_MOVU_P_MR>; +} // SchedRW // For disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -1009,7 +1041,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; @@ -1044,7 +1076,7 @@ let Predicates = [UseSSE1] in { // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), "movaps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -1061,7 +1093,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper // bits are disregarded. FIXME: Set encoding to pseudo! -let canFoldAsLoad = 1, isReMaterializable = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { let isCodeGenOnly = 1 in { def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", @@ -1095,14 +1127,16 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, [(set VR128:$dst, (psnode VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], - itin, SSEPackedSingle>, TB; + itin, SSEPackedSingle>, TB, + Sched<[WriteShuffleLd, ReadAfterLd]>; def PDrm : PI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), !strconcat(base_opc, "d", asm_opr), [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))], - itin, SSEPackedDouble>, TB, OpSize; + itin, SSEPackedDouble>, TB, OpSize, + Sched<[WriteShuffleLd, ReadAfterLd]>; } @@ -1123,6 +1157,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -1143,6 +1178,7 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // Shuffle with VMOVLPS @@ -1222,6 +1258,7 @@ let AddedComplexity = 20 in { IIC_SSE_MOV_LH>; } +let SchedRW = [WriteStore] in { // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), @@ -1246,6 +1283,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), [(store (f64 (vector_extract (v2f64 (X86Unpckh VR128:$src, VR128:$src)), (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; +} // SchedRW let Predicates = [HasAVX] in { // VMOVHPS patterns @@ -1296,14 +1334,14 @@ let AddedComplexity = 20 in { [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_LH>, - VEX_4V; + VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst", AddedComplexity = 20 in { def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), @@ -1311,13 +1349,13 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movhlps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; } let Predicates = [HasAVX] in { @@ -1352,22 +1390,27 @@ def SSE_CVT_PD : OpndItins< IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_PS : OpndItins< IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM >; +let Sched = WriteCvtI2F in def SSE_CVT_Scalar : OpndItins< IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_32 : OpndItins< IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SS2SI_64 : OpndItins< IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM >; +let Sched = WriteCvtF2I in def SSE_CVT_SD2SI : OpndItins< IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM >; @@ -1377,10 +1420,10 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, [(set DstRC:$dst, (OpNode SrcRC:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -1388,10 +1431,10 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, OpndItins itins> { let neverHasSideEffects = 1 in { def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, - [], itins.rr, d>; + [], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, - [], itins.rm, d>; + [], itins.rm, d>, Sched<[itins.Sched.Folded]>; } } @@ -1399,11 +1442,13 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm> { let neverHasSideEffects = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2F]>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src), - !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; + !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, + Sched<[WriteCvtI2FLd, ReadAfterLd]>; } // neverHasSideEffects = 1 } @@ -1534,10 +1579,12 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, string asm, OpndItins itins> { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; + [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, + Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; + [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, @@ -1549,14 +1596,14 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins DstRC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, @@ -1701,13 +1748,15 @@ let neverHasSideEffects = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], - IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; + IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; + XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, @@ -1716,26 +1765,28 @@ def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))], - IIC_SSE_CVT_Scalar_RR>; + IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))], IIC_SSE_CVT_Scalar_RM>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, @@ -1743,13 +1794,15 @@ def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, sse_load_f64:$src2))], - IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert scalar single to scalar double @@ -1759,13 +1812,15 @@ def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RR>, - XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; + XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, + Sched<[WriteCvtF2F]>; let mayLoad = 1 in def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], IIC_SSE_CVT_Scalar_RM>, - XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; + XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } def : Pat<(f64 (fextend FR32:$src)), @@ -1784,12 +1839,12 @@ def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))], IIC_SSE_CVT_Scalar_RR>, XS, - Requires<[UseSSE2]>; + Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (extloadf32 addr:$src))], IIC_SSE_CVT_Scalar_RM>, XS, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; // extload f32 -> f64. This matches load+fextend because we have a hack in // the isel (PreprocessForFPConvert) that can introduce loads after dag @@ -1806,57 +1861,61 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2F]>; def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; + IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], - IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2F]>; def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), "cvtss2sd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], - IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; + IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, + Sched<[WriteCvtF2FLd, ReadAfterLd]>; } // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; // Convert Packed Double FP to Packed DW Integers @@ -1867,7 +1926,7 @@ let Predicates = [HasAVX] in { def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - VEX; + VEX, Sched<[WriteCvtF2I]>; // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", @@ -1875,18 +1934,20 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; + (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX, + Sched<[WriteCvtF2ILd]>; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; + (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, + Sched<[WriteCvtF2I]>; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, - VEX, VEX_L; + VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; } @@ -1895,11 +1956,11 @@ def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix @@ -1907,32 +1968,33 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>, VEX; + IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX; + IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], - IIC_SSE_CVT_PS_RR>, VEX, VEX_L; + IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>, VEX, VEX_L; + IIC_SSE_CVT_PS_RM>, VEX, VEX_L, + Sched<[WriteCvtF2ILd]>; def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], - IIC_SSE_CVT_PS_RR>; + IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PS_RM>; + IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; let Predicates = [HasAVX] in { def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), @@ -1982,7 +2044,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. @@ -1995,19 +2057,19 @@ def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; @@ -2021,12 +2083,13 @@ let Predicates = [HasAVX] in { def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, + Sched<[WriteCvtF2ILd]>; // Convert packed single to packed double let Predicates = [HasAVX] in { @@ -2034,32 +2097,32 @@ let Predicates = [HasAVX] in { def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX; + IIC_SSE_CVT_PD_RR>, TB, VEX, Sched<[WriteCvtF2F]>; def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX; + IIC_SSE_CVT_PD_RM>, TB, VEX, Sched<[WriteCvtF2FLd]>; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; } let Predicates = [UseSSE2] in { def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], - IIC_SSE_CVT_PD_RR>, TB; + IIC_SSE_CVT_PD_RR>, TB, Sched<[WriteCvtF2F]>; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], - IIC_SSE_CVT_PD_RM>, TB; + IIC_SSE_CVT_PD_RM>, TB, Sched<[WriteCvtF2FLd]>; } // Convert Packed DW Integers to Packed Double FP @@ -2067,30 +2130,33 @@ let Predicates = [HasAVX] in { let neverHasSideEffects = 1, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", - []>, VEX; + []>, VEX, Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; + (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, + Sched<[WriteCvtI2F]>; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, (int_x86_avx_cvtdq2_pd_256 - (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; + (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L, + Sched<[WriteCvtI2FLd]>; def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; + (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, + Sched<[WriteCvtI2F]>; } let neverHasSideEffects = 1, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; // AVX 256-bit register conversion intrinsics let Predicates = [HasAVX] in { @@ -2107,7 +2173,7 @@ let Predicates = [HasAVX] in { def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>, VEX; + IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", @@ -2116,31 +2182,31 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX; + IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; // YMM only def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], - IIC_SSE_CVT_PD_RR>, VEX, VEX_L; + IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>, VEX, VEX_L; + IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], - IIC_SSE_CVT_PD_RR>; + IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], - IIC_SSE_CVT_PD_RM>; + IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; // AVX 256-bit register conversion intrinsics @@ -2193,22 +2259,24 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, def rr : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (OpNode (VT RC:$src1), (ld_frag addr:$src2), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RR>; + IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], - IIC_SSE_ALU_F32S_RM>; + IIC_SSE_ALU_F32S_RM>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2241,12 +2309,14 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, (ins VR128:$src1, VR128:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, VR128:$src, imm:$cc))], - itins.rr>; + itins.rr>, + Sched<[itins.Sched]>; def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src, CC:$cc), asm, [(set VR128:$dst, (Int VR128:$src1, (load addr:$src), imm:$cc))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Aliases to match intrinsics which expect XMM operand(s). @@ -2276,12 +2346,14 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], - IIC_SSE_COMIS_RR, d>; + IIC_SSE_COMIS_RR, d>, + Sched<[WriteFAdd]>; def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), (ld_frag addr:$src2)))], - IIC_SSE_COMIS_RM, d>; + IIC_SSE_COMIS_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } let Defs = [EFLAGS] in { @@ -2338,20 +2410,23 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, def rri : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], - IIC_SSE_CMPP_RR, d>; + IIC_SSE_CMPP_RR, d>, + Sched<[WriteFAdd]>; def rmi : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], - IIC_SSE_CMPP_RM, d>; + IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let neverHasSideEffects = 1 in { def rri_alt : PIi8<0xC2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RR, d>; + asm_alt, [], IIC_SSE_CMPP_RR, d>, Sched<[WriteFAdd]>; def rmi_alt : PIi8<0xC2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_CMPP_RM, d>; + asm_alt, [], IIC_SSE_CMPP_RM, d>, + Sched<[WriteFAddLd, ReadAfterLd]>; } } @@ -2427,12 +2502,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, - (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; + (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, + Sched<[WriteShuffle]>; } defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, @@ -2516,13 +2593,14 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, (outs RC:$dst), (ins RC:$src1, RC:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, Sched<[WriteShuffle]>; def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), asm, [(set RC:$dst, (vt (OpNode RC:$src1, (mem_frag addr:$src2))))], - IIC_SSE_UNPCK, d>; + IIC_SSE_UNPCK, d>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, @@ -2613,10 +2691,11 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, Domain d> { def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; + [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, + Sched<[WriteVecLogic]>; def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], - IIC_SSE_MOVMSK, d>, REX_W; + IIC_SSE_MOVMSK, d>, REX_W, Sched<[WriteVecLogic]>; } let Predicates = [HasAVX] in { @@ -2644,18 +2723,18 @@ let Predicates = [HasAVX] in { // Assembler Only def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX; + SSEPackedSingle>, TB, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX; + OpSize, VEX, Sched<[WriteVecLogic]>; def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, - SSEPackedSingle>, TB, VEX, VEX_L; + SSEPackedSingle>, TB, VEX, VEX_L, Sched<[WriteVecLogic]>; def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, SSEPackedDouble>, TB, - OpSize, VEX, VEX_L; + OpSize, VEX, VEX_L, Sched<[WriteVecLogic]>; } defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", @@ -2693,7 +2772,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; + [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -2701,7 +2781,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))], - itins.rm>; + itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -2967,6 +3048,7 @@ let isCodeGenOnly = 1 in { /// /// And, we have a special variant form for a full-vector intrinsic form. +let Sched = WriteFSqrt in { def SSE_SQRTP : OpndItins< IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM >; @@ -2974,7 +3056,9 @@ def SSE_SQRTP : OpndItins< def SSE_SQRTS : OpndItins< IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM >; +} +let Sched = WriteFRcp in { def SSE_RCPP : OpndItins< IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM >; @@ -2982,6 +3066,7 @@ def SSE_RCPP : OpndItins< def SSE_RCPS : OpndItins< IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM >; +} /// sse1_fp_unop_s - SSE1 unops in scalar form. multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, @@ -2991,24 +3076,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3016,13 +3103,15 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; + [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_s_rw - SSE1 unops where vector form has a read-write operand. @@ -3033,24 +3122,26 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR32:$src1, FR32:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat("v", OpcodeStr, "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]>; + [(set FR32:$dst, (OpNode FR32:$src))]>, Sched<[itins.Sched]>; // For scalar unary operations, fold a load into the operation // only in OptForSize mode. It eliminates an instruction, but it also // eliminates a whole-register clobber (the load), so it introduces a @@ -3058,17 +3149,17 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, - Requires<[UseSSE1, OptForSize]>; + Requires<[UseSSE1, OptForSize]>, Sched<[itins.Sched.Folded]>; let Constraints = "$src1 = $dst" in { def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rr>; + [], itins.rr>, Sched<[itins.Sched]>; let mayLoad = 1, hasSideEffects = 0 in def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [], itins.rm>; + [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3080,30 +3171,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. @@ -3115,33 +3208,33 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int VR256:$src))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (V8F32Int (memopv8f32 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int VR128:$src))], - itins.rr>; + itins.rr>, Sched<[itins.Sched]>; def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_s - SSE2 unops in scalar form. @@ -3152,35 +3245,40 @@ let Predicates = [HasAVX], hasSideEffects = 0 in { (ins FR64:$src1, FR64:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, Sched<[itins.Sched]>; let mayLoad = 1 in { def V#NAME#SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; def V#NAME#SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), !strconcat("v", OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, VEX_4V, VEX_LIG; + []>, VEX_4V, VEX_LIG, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; + [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>, + Sched<[itins.Sched]>; // See the comments in sse1_fp_unop_s for why this is OptForSize. def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, - Requires<[UseSSE2, OptForSize]>; + Requires<[UseSSE2, OptForSize]>, Sched<[itins.Sched.Folded]>; def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; + [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>, + Sched<[itins.Sched]>; def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; + [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>, + Sched<[itins.Sched.Folded]>; } /// sse2_fp_unop_p - SSE2 unops in vector forms. @@ -3191,30 +3289,32 @@ let Predicates = [HasAVX] in { !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], - itins.rr>, VEX; + itins.rr>, VEX, Sched<[itins.Sched]>; def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], - itins.rm>, VEX; + itins.rm>, VEX, Sched<[itins.Sched.Folded]>; def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], - itins.rr>, VEX, VEX_L; + itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat("v", OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], - itins.rm>, VEX, VEX_L; + itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; } def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, + Sched<[itins.Sched]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; + [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, + Sched<[itins.Sched.Folded]>; } // Square root. @@ -3305,52 +3405,48 @@ let Predicates = [UseSSE1] in { //===----------------------------------------------------------------------===// let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2i64 VR128:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX; - - def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; - - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4i64 VR256:$src), - addr:$dst)], - IIC_SSE_MOVNT>, VEX, VEX_L; -} +let SchedRW = [WriteStore] in { +def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; +def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +let ExeDomain = SSEPackedInt in +def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2i64 VR128:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX; + +def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; +let ExeDomain = SSEPackedInt in +def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4i64 VR256:$src), + addr:$dst)], + IIC_SSE_MOVNT>, VEX, VEX_L; -let AddedComplexity = 400 in { // Prefer non-temporal versions def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], @@ -3366,9 +3462,6 @@ def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], IIC_SSE_MOVNT>; -def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), - (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; - // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -3380,14 +3473,21 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), [(nontemporalstore (i64 GR64:$src), addr:$dst)], IIC_SSE_MOVNT>, TB, Requires<[HasSSE2]>; -} +} // SchedRW = [WriteStore] + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; + +def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), + (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; +} // AddedComplexity //===----------------------------------------------------------------------===// // SSE 1 & 2 - Prefetch and memory fence //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1] in { +let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; @@ -3402,6 +3502,8 @@ def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), IIC_SSE_PREFETCH>, TB; } +// FIXME: How should these memory instructions be modeled? +let SchedRW = [WriteLoad] in { // Flush cache def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], @@ -3421,6 +3523,7 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins), def MFENCE : I<0xAE, MRM_F0, (outs), (ins), "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, TB, Requires<[HasSSE2]>; +} // SchedRW def : Pat<(X86SFence), (SFENCE)>; def : Pat<(X86LFence), (LFENCE)>; @@ -3432,17 +3535,17 @@ def : Pat<(X86MFence), (MFENCE)>; def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], - IIC_SSE_LDMXCSR>, VEX; + IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], - IIC_SSE_STMXCSR>, VEX; + IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], - IIC_SSE_LDMXCSR>; + IIC_SSE_LDMXCSR>, Sched<[WriteLoad]>; def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], - IIC_SSE_STMXCSR>; + IIC_SSE_STMXCSR>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // SSE2 - Move Aligned/Unaligned Packed Integer Instructions @@ -3450,7 +3553,7 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let neverHasSideEffects = 1 in { +let neverHasSideEffects = 1, SchedRW = [WriteMove] in { def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, VEX; @@ -3466,7 +3569,7 @@ def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), } // For Disassembler -let isCodeGenOnly = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, @@ -3484,7 +3587,7 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, VEX; @@ -3501,7 +3604,7 @@ let Predicates = [HasAVX] in { } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, neverHasSideEffects = 1, SchedRW = [WriteStore] in { def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, @@ -3520,6 +3623,7 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), } } +let SchedRW = [WriteMove] in { let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; @@ -3538,9 +3642,10 @@ def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; } +} // SchedRW let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, - neverHasSideEffects = 1 in { + neverHasSideEffects = 1, SchedRW = [WriteLoad] in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], @@ -3552,7 +3657,7 @@ def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), XS, Requires<[UseSSE2]>; } -let mayStore = 1 in { +let mayStore = 1, SchedRW = [WriteStore] in { def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], @@ -3580,6 +3685,7 @@ def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecIMul in def SSE_PMADD : OpndItins< IIC_SSE_PMADD, IIC_SSE_PMADD >; @@ -3598,14 +3704,15 @@ multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], - itins.rm>; + itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, @@ -3639,20 +3746,22 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], - itins.rr>; + itins.rr>, Sched<[WriteVecShift]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, i128mem:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode RC:$src1, - (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; + (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>, + Sched<[WriteVecShiftLd, ReadAfterLd]>; def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; + [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>, + Sched<[WriteVecShift]>; } /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3667,14 +3776,16 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; + [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, + Sched<[itins.Sched]>; def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), - (bitconvert (memop_frag addr:$src2)))))]>; + (bitconvert (memop_frag addr:$src2)))))]>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } // ExeDomain = SSEPackedInt @@ -3779,7 +3890,7 @@ defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def VPSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3825,7 +3936,7 @@ defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, VR256, v8i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 256-bit logical shifts. def VPSLLDQYri : PDIi8<0x73, MRM7r, (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), @@ -3871,7 +3982,7 @@ defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, VR128, v4i32, v4i32, bc_v4i32, SSE_INTSHIFT_ITINS_P>; -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { // 128-bit logical shifts. def PSLLDQri : PDIi8<0x73, MRM7r, (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -3966,14 +4077,15 @@ let Predicates = [HasAVX] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX; + IIC_SSE_PSHUF>, VEX, Sched<[WriteShuffle]>; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX2] in { @@ -3983,14 +4095,15 @@ let Predicates = [HasAVX2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>, VEX, VEX_L; + IIC_SSE_PSHUF>, VEX, VEX_L, Sched<[WriteShuffle]>; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), !strconcat("v", OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode (bitconvert (memopv4i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, VEX, VEX_L, + Sched<[WriteShuffleLd]>; } let Predicates = [UseSSE2] in { @@ -4000,14 +4113,15 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], - IIC_SSE_PSHUF>; + IIC_SSE_PSHUF>, Sched<[WriteShuffle]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), - (i8 imm:$src2))))], IIC_SSE_PSHUF>; + (i8 imm:$src2))))], IIC_SSE_PSHUF>, + Sched<[WriteShuffleLd]>; } } } // ExeDomain = SSEPackedInt @@ -4043,7 +4157,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -4052,7 +4166,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, [(set VR128:$dst, (OpNode VR128:$src1, (bc_frag (memopv2i64 addr:$src2))))], - IIC_SSE_UNPCK>; + IIC_SSE_UNPCK>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, @@ -4060,12 +4175,14 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, def Yrr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; + [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, + Sched<[WriteShuffle]>; def Yrm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (OpNode VR256:$src1, - (bc_frag (memopv4i64 addr:$src2))))]>; + (bc_frag (memopv4i64 addr:$src2))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4142,7 +4259,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffle]>; def rmi : Ii8<0xC4, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i16mem:$src2, i32i8imm:$src3), @@ -4151,7 +4269,8 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> { "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))], IIC_SSE_PINSRW>; + imm:$src3))], IIC_SSE_PINSRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; } // Extract @@ -4160,12 +4279,14 @@ def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))]>, TB, OpSize, VEX; + imm:$src2))]>, TB, OpSize, VEX, + Sched<[WriteShuffle]>; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), - imm:$src2))], IIC_SSE_PEXTRW>; + imm:$src2))], IIC_SSE_PEXTRW>, + Sched<[WriteShuffleLd, ReadAfterLd]>; // Insert let Predicates = [HasAVX] in { @@ -4173,7 +4294,7 @@ let Predicates = [HasAVX] in { def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, TB, OpSize, VEX_4V; + []>, TB, OpSize, VEX_4V, Sched<[WriteShuffle]>; } let Constraints = "$src1 = $dst" in @@ -4185,7 +4306,7 @@ let Constraints = "$src1 = $dst" in // SSE2 - Packed Mask Creation //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", @@ -4213,7 +4334,7 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), // SSE2 - Conditional Store //===---------------------------------------------------------------------===// -let ExeDomain = SSEPackedInt in { +let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), @@ -4252,41 +4373,42 @@ def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteMove]>; def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; + (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, + Sched<[WriteMove]>; def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; //===---------------------------------------------------------------------===// // Move Int Doubleword to Single Scalar @@ -4294,22 +4416,22 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteMove]>; def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int to Packed Double Int @@ -4317,26 +4439,29 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, + Sched<[WriteMove]>; def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, - VEX; + VEX, Sched<[WriteLoad]>; def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), - (iPTR 0)))], IIC_SSE_MOVD_ToGP>; + (iPTR 0)))], IIC_SSE_MOVD_ToGP>, + Sched<[WriteMove]>; def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; //===---------------------------------------------------------------------===// // Move Packed Doubleword Int first element to Doubleword Int // +let SchedRW = [WriteMove] in { def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "vmov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), @@ -4349,6 +4474,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), (iPTR 0)))], IIC_SSE_MOVD_ToGP>; +} //SchedRW //===---------------------------------------------------------------------===// // Bitcast FR64 <-> GR64 @@ -4357,28 +4483,28 @@ let Predicates = [HasAVX] in def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, - VEX; + VEX, Sched<[WriteLoad]>; def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Move Scalar Single to Double Int @@ -4386,23 +4512,24 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>, VEX; + IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>, VEX; + IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))], - IIC_SSE_MOVD_ToGP>; + IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; //===---------------------------------------------------------------------===// // Patterns and instructions to describe movd/movq to XMM register zero-extends // +let SchedRW = [WriteMove] in { let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4428,8 +4555,9 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), (v2i64 (scalar_to_vector GR64:$src)))))], IIC_SSE_MOVDQ>; } +} // SchedRW -let AddedComplexity = 20 in { +let AddedComplexity = 20, SchedRW = [WriteLoad] in { def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4442,7 +4570,7 @@ def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), (v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))))], IIC_SSE_MOVDQ>; -} +} // AddedComplexity, SchedRW let Predicates = [HasAVX] in { // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. @@ -4491,6 +4619,8 @@ def : InstAlias<"movq\t{$src, $dst|$dst, $src}", //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int // + +let SchedRW = [WriteLoad] in { def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4502,10 +4632,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (scalar_to_vector (loadi64 addr:$src))))], IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix +} // SchedRW //===---------------------------------------------------------------------===// // Move Packed Quadword Int to Quadword Int // +let SchedRW = [WriteStore] in { def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -4516,17 +4648,19 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), [(store (i64 (vector_extract (v2i64 VR128:$src), (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>; +} // SchedRW //===---------------------------------------------------------------------===// // Store / copy lower 64-bits of a XMM register. // def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX, + Sched<[WriteStore]>; def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], - IIC_SSE_MOVDQ>; + IIC_SSE_MOVDQ>, Sched<[WriteStore]>; let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4535,7 +4669,7 @@ def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, VEX, Requires<[HasAVX]>; + XS, VEX, Requires<[HasAVX]>, Sched<[WriteLoad]>; let AddedComplexity = 20 in def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), @@ -4544,7 +4678,7 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))))], IIC_SSE_MOVDQ>, - XS, Requires<[UseSSE2]>; + XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; let Predicates = [HasAVX], AddedComplexity = 20 in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), @@ -4574,6 +4708,7 @@ def : Pat<(v4i64 (X86vzload addr:$src)), // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. // +let SchedRW = [WriteVecLogic] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4586,7 +4721,9 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], IIC_SSE_MOVQ_RR>, XS, Requires<[UseSSE2]>; +} // SchedRW +let SchedRW = [WriteVecLogicLd] in { let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4602,6 +4739,7 @@ def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), IIC_SSE_MOVDQ>, XS, Requires<[UseSSE2]>; } +} // SchedRW let AddedComplexity = 20 in { let Predicates = [HasAVX] in { @@ -4619,6 +4757,7 @@ let AddedComplexity = 20 in { } // Instructions to match in the assembler +let SchedRW = [WriteMove] in { def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; @@ -4629,16 +4768,19 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVDQ>, VEX, VEX_W; +} // SchedRW // Instructions for the disassembler // xr = XMM register // xm = mem64 +let SchedRW = [WriteMove] in { let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; +} // SchedRW //===---------------------------------------------------------------------===// // SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP @@ -4649,11 +4791,11 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (vt (OpNode RC:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (OpNode (mem_frag addr:$src)))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4709,25 +4851,27 @@ multiclass sse3_replicate_dfp<string OpcodeStr> { let neverHasSideEffects = 1 in def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [], IIC_SSE_MOV_LH>; + [], IIC_SSE_MOV_LH>, Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (X86Movddup (scalar_to_vector (loadf64 addr:$src)))))], - IIC_SSE_MOV_LH>; + IIC_SSE_MOV_LH>, Sched<[WriteShuffleLd]>; } // FIXME: Merge with above classe when there're patterns for the ymm version multiclass sse3_replicate_dfp_y<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; + [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, + Sched<[WriteShuffle]>; def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (v4f64 (X86Movddup - (scalar_to_vector (loadf64 addr:$src)))))]>; + (scalar_to_vector (loadf64 addr:$src)))))]>, + Sched<[WriteShuffleLd]>; } let Predicates = [HasAVX] in { @@ -4775,6 +4919,7 @@ let Predicates = [UseSSE3] in { // SSE3 - Move Unaligned Integer //===---------------------------------------------------------------------===// +let SchedRW = [WriteLoad] in { let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", @@ -4788,6 +4933,7 @@ def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "lddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], IIC_SSE_LDDQU>; +} //===---------------------------------------------------------------------===// // SSE3 - Arithmetic @@ -4801,13 +4947,15 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, + Sched<[itins.Sched]>; def rm : I<0xD0, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; + [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4844,14 +4992,15 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { @@ -4859,14 +5008,15 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, + Sched<[WriteFAdd]>; def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], - IIC_SSE_HADDSUB_RM>; + IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; } let Predicates = [HasAVX] in { @@ -4915,7 +5065,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), @@ -4923,7 +5073,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, [(set VR128:$dst, (IntId128 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, - OpSize; + OpSize, Sched<[WriteVecALULd]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. @@ -4933,14 +5083,15 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 VR256:$src))]>, - OpSize; + OpSize, Sched<[WriteVecALU]>; def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (IntId256 - (bitconvert (memopv4i64 addr:$src))))]>, OpSize; + (bitconvert (memopv4i64 addr:$src))))]>, OpSize, + Sched<[WriteVecALULd]>; } let Predicates = [HasAVX] in { @@ -4972,6 +5123,7 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// +let Sched = WriteVecALU in { def SSE_PHADDSUBD : OpndItins< IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM >; @@ -4981,12 +5133,16 @@ def SSE_PHADDSUBSW : OpndItins< def SSE_PHADDSUBW : OpndItins< IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; +} +let Sched = WriteShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; +let Sched = WriteVecALU in def SSE_PSIGN : OpndItins< IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM >; +let Sched = WriteVecIMul in def SSE_PMULHRSW : OpndItins< IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW >; @@ -5003,7 +5159,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), !if(Is2Addr, @@ -5011,7 +5167,8 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set RC:$dst, (OpVT (OpNode RC:$src1, - (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; + (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. @@ -5025,7 +5182,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize; + OpSize, Sched<[itins.Sched]>; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !if(Is2Addr, @@ -5033,7 +5190,8 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; + (bitconvert (memopv2i64 addr:$src2))))]>, OpSize, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, @@ -5175,7 +5333,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), @@ -5183,7 +5341,7 @@ multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), - [], IIC_SSE_PALIGNR>, OpSize; + [], IIC_SSE_PALIGNR>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5193,13 +5351,13 @@ multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffle]>; let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, OpSize; + []>, OpSize, Sched<[WriteShuffleLd, ReadAfterLd]>; } } @@ -5247,6 +5405,7 @@ def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), // SSSE3 - Thread synchronization //===---------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let usesCustomInserter = 1 in { def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, @@ -5260,6 +5419,7 @@ let Uses = [ECX, EAX] in def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, TB, Requires<[HasSSE3]>; +} // SchedRW def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; @@ -6679,7 +6839,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], - IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, RC:$src3), @@ -6688,7 +6848,7 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, [(set RC:$dst, (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), RC:$src3))], - IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + NoItinerary, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; } let Predicates = [HasAVX] in { diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 1185941d34..5b6298b541 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -15,7 +15,7 @@ let Defs = [EFLAGS] in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, CL}", @@ -62,9 +62,10 @@ def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", [], IIC_SR>; } // hasSideEffects = 0 } // isConvertibleToThreeAddress = 1 -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { // FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern // using CL? let Uses = [CL] in { @@ -118,8 +119,9 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, CL}", @@ -163,9 +165,10 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t{%cl, $dst|$dst, CL}", @@ -216,8 +219,9 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, CL}", @@ -273,9 +277,10 @@ def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t{%cl, $dst|$dst, CL}", @@ -330,13 +335,14 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), "sar{q}\t$dst", [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW //===----------------------------------------------------------------------===// // Rotate instructions //===----------------------------------------------------------------------===// let hasSideEffects = 0 in { -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t$dst", [], IIC_SR>; def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), @@ -405,6 +411,7 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1), } // Constraints = "$src = $dst" +let SchedRW = [WriteShiftLd, WriteRMW] in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t$dst", [], IIC_SR>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), @@ -458,9 +465,10 @@ def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>; } +} // SchedRW } // hasSideEffects = 0 -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), @@ -512,8 +520,9 @@ def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t{%cl, $dst|$dst, CL}", @@ -568,8 +577,9 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, CL}", @@ -620,8 +630,9 @@ def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))], IIC_SR>; -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t{%cl, $dst|$dst, CL}", @@ -676,13 +687,14 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), "ror{q}\t$dst", [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)], IIC_SR>; +} // SchedRW //===----------------------------------------------------------------------===// // Double shift instructions (generalizations of rotate) //===----------------------------------------------------------------------===// -let Constraints = "$src1 = $dst" in { +let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in { let Uses = [CL] in { def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), @@ -765,8 +777,9 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (i8 imm:$src3)))], IIC_SHD64_REG_IM>, TB; } -} // Constraints = "$src = $dst" +} // Constraints = "$src = $dst", SchedRW +let SchedRW = [WriteShiftLd, WriteRMW] in { let Uses = [CL] in { def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}", @@ -840,6 +853,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, (i8 imm:$src3)), addr:$dst)], IIC_SHD64_MEM_IM>, TB; +} // SchedRW } // Defs = [EFLAGS] @@ -857,12 +871,12 @@ multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> { let neverHasSideEffects = 1 in { def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TAXD, VEX; + []>, TAXD, VEX, Sched<[WriteShift]>; let mayLoad = 1 in def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, i8imm:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, TAXD, VEX; + []>, TAXD, VEX, Sched<[WriteShiftLd]>; } } @@ -870,11 +884,17 @@ multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> { let neverHasSideEffects = 1 in { def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - VEX_4VOp3; + VEX_4VOp3, Sched<[WriteShift]>; let mayLoad = 1 in def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - VEX_4VOp3; + VEX_4VOp3, + Sched<[WriteShiftLd, + // x86memop:$src1 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC:$src1 + ReadAfterLd]>; } } diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 3caa1b538c..053417ccde 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +let SchedRW = [WriteSystem] in { let Defs = [RAX, RDX] in def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)], IIC_RDTSC>, TB; @@ -35,6 +36,7 @@ let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))], IIC_INT3>; +} // SchedRW def : Pat<(debugtrap), (INT3)>; @@ -43,6 +45,7 @@ def : Pat<(debugtrap), // FIXME: This doesn't work because InstAlias can't match immediate constants. //def : InstAlias<"int\t$3", (INT3)>; +let SchedRW = [WriteSystem] in { def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)], IIC_INT>; @@ -65,11 +68,13 @@ def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize; def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>; def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>, Requires<[In64BitMode]>; +} // SchedRW //===----------------------------------------------------------------------===// // Input/Output Instructions. // +let SchedRW = [WriteSystem] in { let Defs = [AL], Uses = [DX] in def IN8rr : I<0xEC, RawFrm, (outs), (ins), "in{b}\t{%dx, %al|AL, DX}", [], IIC_IN_RR>; @@ -113,10 +118,12 @@ def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port), def IN8 : I<0x6C, RawFrm, (outs), (ins), "ins{b}", [], IIC_INS>; def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", [], IIC_INS>, OpSize; def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", [], IIC_INS>; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from debug registers +let SchedRW = [WriteSystem] in { def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_DR>, TB; def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src), @@ -126,10 +133,12 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_DR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Moves to and from control registers +let SchedRW = [WriteSystem] in { def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_CR>, TB; def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src), @@ -139,6 +148,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_CR_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Segment override instruction prefixes @@ -155,6 +165,7 @@ def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>; // Moves to and from segment registers. // +let SchedRW = [WriteMove] in { def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src), "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV_REG_SR>, OpSize; def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src), @@ -182,10 +193,12 @@ def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src), "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV_SR_MEM>; +} // SchedRW //===----------------------------------------------------------------------===// // Segmentation support instructions. +let SchedRW = [WriteSystem] in { def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB; def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), @@ -347,10 +360,12 @@ def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", [], IIC_VERW_MEM>, TB; def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", [], IIC_VERW_REG>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Descriptor-table support instructions +let SchedRW = [WriteSystem] in { def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), "sgdt{w}\t$dst", [], IIC_SGDT>, TB, OpSize, Requires<[In32BitMode]>; def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins), @@ -385,9 +400,11 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src), "lldt{w}\t$src", [], IIC_LLDT_REG>, TB; def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), "lldt{w}\t$src", [], IIC_LLDT_MEM>, TB; - +} // SchedRW + //===----------------------------------------------------------------------===// // Specialized register support +let SchedRW = [WriteSystem] in { def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB; @@ -410,14 +427,18 @@ def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src), "lmsw{w}\t$src", [], IIC_LMSW_REG>, TB; def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", [], IIC_CPUID>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // Cache instructions +let SchedRW = [WriteSystem] in { def INVD : I<0x08, RawFrm, (outs), (ins), "invd", [], IIC_INVD>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB; +} // SchedRW //===----------------------------------------------------------------------===// // XSAVE instructions +let SchedRW = [WriteSystem] in { let Defs = [RDX, RAX], Uses = [RCX] in def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; @@ -438,6 +459,7 @@ let Uses = [RDX, RAX] in { def XSAVEOPT64 : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins), "xsaveoptq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>; } +} // SchedRW //===----------------------------------------------------------------------===// // VIA PadLock crypto instructions diff --git a/lib/Target/X86/X86InstrTSX.td b/lib/Target/X86/X86InstrTSX.td index a37a8cc744..363a190aa8 100644 --- a/lib/Target/X86/X86InstrTSX.td +++ b/lib/Target/X86/X86InstrTSX.td @@ -15,6 +15,9 @@ //===----------------------------------------------------------------------===// // TSX instructions +def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>, + [SDNPHasChain, SDNPSideEffect]>; + let usesCustomInserter = 1 in def XBEGIN : I<0, Pseudo, (outs GR32:$dst), (ins), "# XBEGIN", [(set GR32:$dst, (int_x86_xbegin))]>, @@ -27,6 +30,10 @@ def XBEGIN_4 : Ii32PCRel<0xc7, MRM_F8, (outs), (ins brtarget:$dst), def XEND : I<0x01, MRM_D5, (outs), (ins), "xend", [(int_x86_xend)]>, TB, Requires<[HasRTM]>; +let Defs = [EFLAGS] in +def XTEST : I<0x01, MRM_D6, (outs), (ins), + "xtest", [(set EFLAGS, (X86xtest))]>, TB, Requires<[HasTSX]>; + def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm), "xabort\t$imm", [(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 3af1b3e06b..a8a9fd8acc 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -407,6 +407,57 @@ ReSimplify: LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; + // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B + // if one of the registers is extended, but other isn't. + case X86::VMOVAPDrr: + case X86::VMOVAPDYrr: + case X86::VMOVAPSrr: + case X86::VMOVAPSYrr: + case X86::VMOVDQArr: + case X86::VMOVDQAYrr: + case X86::VMOVDQUrr: + case X86::VMOVDQUYrr: + case X86::VMOVUPDrr: + case X86::VMOVUPDYrr: + case X86::VMOVUPSrr: + case X86::VMOVUPSYrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break; + case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break; + case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break; + case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break; + case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break; + case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break; + case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break; + case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break; + case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break; + case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break; + case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break; + case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + case X86::VMOVSDrr: + case X86::VMOVSSrr: { + if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) && + X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) { + unsigned NewOpc; + switch (OutMI.getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break; + case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break; + } + OutMI.setOpcode(NewOpc); + } + break; + } + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register // inputs modeled as normal uses instead of implicit uses. As such, truncate // off all but the first operand (the callee). FIXME: Change isel. diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td new file mode 100644 index 0000000000..b3eb460d3c --- /dev/null +++ b/lib/Target/X86/X86SchedHaswell.td @@ -0,0 +1,126 @@ +//=- X86SchedHaswell.td - X86 Haswell Scheduling -------------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Haswell to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def HaswellModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and HW can decode 4 + // instructions per cycle. + let IssueWidth = 4; + let MinLatency = 0; // 0 = Out-of-order execution. + let LoadLatency = 4; + let ILPWindow = 40; + let MispredictPenalty = 16; +} + +let SchedModel = HaswellModel in { + +// Haswell can issue micro-ops to 8 different ports in one cycle. + +// Ports 0, 1, 5, 6 and 7 handle all computation. +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. Port 7 can handle address calculations. +def HWPort0 : ProcResource<1>; +def HWPort1 : ProcResource<1>; +def HWPort2 : ProcResource<1>; +def HWPort3 : ProcResource<1>; +def HWPort4 : ProcResource<1>; +def HWPort5 : ProcResource<1>; +def HWPort6 : ProcResource<1>; +def HWPort7 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def HWPort23 : ProcResGroup<[HWPort2, HWPort3]>; +def HWPort237 : ProcResGroup<[HWPort2, HWPort3, HWPort7]>; +def HWPort05 : ProcResGroup<[HWPort0, HWPort5]>; +def HWPort056 : ProcResGroup<[HWPort0, HWPort5, HWPort6]>; +def HWPort15 : ProcResGroup<[HWPort1, HWPort5]>; +def HWPort015 : ProcResGroup<[HWPort0, HWPort1, HWPort5]>; +def HWPort0156: ProcResGroup<[HWPort0, HWPort1, HWPort5, HWPort6]>; + +// Integer division issued on port 0, but uses the non-pipelined divider. +def HWDivider : ProcResource<1> { let Buffered = 0; } + +// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 +// cycles after the memory operand. +def : ReadAdvance<ReadAfterLd, 4>; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW, + ProcResourceKind ExePort, + int Lat> { + // Register variant is using a single cycle on ExePort. + def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the + // latency. + def : WriteRes<SchedRW.Folded, [HWPort23, ExePort]> { + let Latency = !add(Lat, 4); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes<WriteRMW, [HWPort4]>; + +def : WriteRes<WriteStore, [HWPort237, HWPort4]>; +def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 4; } +def : WriteRes<WriteMove, [HWPort0156]>; +def : WriteRes<WriteZero, []>; + +defm : HWWriteResPair<WriteALU, HWPort0156, 1>; +defm : HWWriteResPair<WriteIMul, HWPort1, 3>; +defm : HWWriteResPair<WriteShift, HWPort056, 1>; +defm : HWWriteResPair<WriteJump, HWPort5, 1>; + +// This is for simple LEAs with one or two input operands. +// The complex ones can only execute on port 1, and they require two cycles on +// the port to read all inputs. We don't model that. +def : WriteRes<WriteLEA, [HWPort15]>; + +// This is quite rough, latency depends on the dividend. +def : WriteRes<WriteIDiv, [HWPort0, HWDivider]> { + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes<WriteIDivLd, [HWPort23, HWPort0, HWDivider]> { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +// Scalar and vector floating point. +defm : HWWriteResPair<WriteFAdd, HWPort1, 3>; +defm : HWWriteResPair<WriteFMul, HWPort0, 5>; +defm : HWWriteResPair<WriteFDiv, HWPort0, 12>; // 10-14 cycles. +defm : HWWriteResPair<WriteFRcp, HWPort0, 5>; +defm : HWWriteResPair<WriteFSqrt, HWPort0, 15>; +defm : HWWriteResPair<WriteCvtF2I, HWPort1, 3>; +defm : HWWriteResPair<WriteCvtI2F, HWPort1, 4>; +defm : HWWriteResPair<WriteCvtF2F, HWPort1, 3>; + +// Vector integer operations. +defm : HWWriteResPair<WriteVecShift, HWPort05, 1>; +defm : HWWriteResPair<WriteVecLogic, HWPort015, 1>; +defm : HWWriteResPair<WriteVecALU, HWPort15, 1>; +defm : HWWriteResPair<WriteVecIMul, HWPort0, 5>; +defm : HWWriteResPair<WriteShuffle, HWPort15, 1>; + +def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; } +def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; } +} // SchedModel diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td new file mode 100644 index 0000000000..66d78e4fc4 --- /dev/null +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -0,0 +1,122 @@ +//=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for Sandy Bridge to support instruction +// scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def SandyBridgeModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and SB can decode 4 + // instructions per cycle. + // FIXME: Identify instructions that aren't a single fused micro-op. + let IssueWidth = 4; + let MinLatency = 0; // 0 = Out-of-order execution. + let LoadLatency = 4; + let ILPWindow = 30; + let MispredictPenalty = 16; +} + +let SchedModel = SandyBridgeModel in { + +// Sandy Bridge can issue micro-ops to 6 different ports in one cycle. + +// Ports 0, 1, and 5 handle all computation. +def SBPort0 : ProcResource<1>; +def SBPort1 : ProcResource<1>; +def SBPort5 : ProcResource<1>; + +// Ports 2 and 3 are identical. They handle loads and the address half of +// stores. +def SBPort23 : ProcResource<2>; + +// Port 4 gets the data half of stores. Store data can be available later than +// the store address, but since we don't model the latency of stores, we can +// ignore that. +def SBPort4 : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; +def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; +def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; + +// Integer division issued on port 0, but uses the non-pipelined divider. +def SBDivider : ProcResource<1> { let Buffered = 0; } + +// Loads are 4 cycles, so ReadAfterLd registers needn't be available until 4 +// cycles after the memory operand. +def : ReadAdvance<ReadAfterLd, 4>; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW, + ProcResourceKind ExePort, + int Lat> { + // Register variant is using a single cycle on ExePort. + def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + + // Memory variant also uses a cycle on port 2/3 and adds 4 cycles to the + // latency. + def : WriteRes<SchedRW.Folded, [SBPort23, ExePort]> { + let Latency = !add(Lat, 4); + } +} + +// A folded store needs a cycle on port 4 for the store data, but it does not +// need an extra port 2/3 cycle to recompute the address. +def : WriteRes<WriteRMW, [SBPort4]>; + +def : WriteRes<WriteStore, [SBPort23, SBPort4]>; +def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 4; } +def : WriteRes<WriteMove, [SBPort015]>; +def : WriteRes<WriteZero, []>; + +defm : SBWriteResPair<WriteALU, SBPort015, 1>; +defm : SBWriteResPair<WriteIMul, SBPort1, 3>; +defm : SBWriteResPair<WriteShift, SBPort05, 1>; +defm : SBWriteResPair<WriteJump, SBPort5, 1>; + +// This is for simple LEAs with one or two input operands. +// The complex ones can only execute on port 1, and they require two cycles on +// the port to read all inputs. We don't model that. +def : WriteRes<WriteLEA, [SBPort15]>; + +// This is quite rough, latency depends on the dividend. +def : WriteRes<WriteIDiv, [SBPort0, SBDivider]> { + let Latency = 25; + let ResourceCycles = [1, 10]; +} +def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> { + let Latency = 29; + let ResourceCycles = [1, 1, 10]; +} + +// Scalar and vector floating point. +defm : SBWriteResPair<WriteFAdd, SBPort1, 3>; +defm : SBWriteResPair<WriteFMul, SBPort0, 5>; +defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles. +defm : SBWriteResPair<WriteFRcp, SBPort0, 5>; +defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>; +defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>; +defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>; +defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>; + +// Vector integer operations. +defm : SBWriteResPair<WriteVecShift, SBPort05, 1>; +defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>; +defm : SBWriteResPair<WriteVecALU, SBPort15, 1>; +defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>; +defm : SBWriteResPair<WriteShuffle, SBPort15, 1>; + +def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; } +def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; } +} // SchedModel diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index d99d085298..9fbde88b71 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -7,9 +7,94 @@ // //===----------------------------------------------------------------------===// +// InstrSchedModel annotations for out-of-order CPUs. +// +// These annotations are independent of the itinerary classes defined below. + +// Instructions with folded loads need to read the memory operand immediately, +// but other register operands don't have to be read until the load is ready. +// These operands are marked with ReadAfterLd. +def ReadAfterLd : SchedRead; + +// Instructions with both a load and a store folded are modeled as a folded +// load + WriteRMW. +def WriteRMW : SchedWrite; + +// Most instructions can fold loads, so almost every SchedWrite comes in two +// variants: With and without a folded load. +// An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite +// with a folded load. +class X86FoldableSchedWrite : SchedWrite { + // The SchedWrite to use when a load is folded into the instruction. + SchedWrite Folded; +} + +// Multiclass that produces a linked pair of SchedWrites. +multiclass X86SchedWritePair { + // Register-Memory operation. + def Ld : SchedWrite; + // Register-Register operation. + def NAME : X86FoldableSchedWrite { + let Folded = !cast<SchedWrite>(NAME#"Ld"); + } +} + +// Arithmetic. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +defm WriteIDiv : X86SchedWritePair; // Integer division. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. + +// Integer shifts and rotates. +defm WriteShift : X86SchedWritePair; + +// Loads, stores, and moves, not folded with other operations. +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteMove : SchedWrite; + +// Idioms that clear a register, like xorps %xmm0, %xmm0. +// These can often bypass execution ports completely. +def WriteZero : SchedWrite; + +// Branches don't produce values, so they have no latency, but they still +// consume resources. Indirect branches can fold loads. +defm WriteJump : X86SchedWritePair; + +// Floating point. This covers both scalar and vector operations. +defm WriteFAdd : X86SchedWritePair; // Floating point add/sub/compare. +defm WriteFMul : X86SchedWritePair; // Floating point multiplication. +defm WriteFDiv : X86SchedWritePair; // Floating point division. +defm WriteFSqrt : X86SchedWritePair; // Floating point square root. +defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal. +defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. + +// FMA Scheduling helper class. +class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } + +// Vector integer operations. +defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. +defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. +defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. + +// Vector bitwise operations. +// These are often used on both floating point and integer vectors. +defm WriteVecLogic : X86SchedWritePair; // Vector and/or/xor. +defm WriteShuffle : X86SchedWritePair; // Vector shuffles and blends. + +// Conversion between integer and float. +defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. +defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. +defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. + +// Catch-all for expensive system instructions. +def WriteSystem : SchedWrite; + +// Old microcoded instructions that nobody use. +def WriteMicrocoded : SchedWrite; + //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for X86 -def IIC_DEFAULT : InstrItinClass; def IIC_ALU_MEM : InstrItinClass; def IIC_ALU_NONMEM : InstrItinClass; def IIC_LEA : InstrItinClass; @@ -484,3 +569,5 @@ def GenericModel : SchedMachineModel { } include "X86ScheduleAtom.td" +include "X86SchedSandyBridge.td" +include "X86SchedHaswell.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 1e5f2d6c9a..cce8f1b114 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -33,7 +33,6 @@ def AtomItineraries : ProcessorItineraries< // InstrItinData<class, [InstrStage<N, [P0], 0>, InstrStage<N, [P1]>] >, // // Default is 1 cycle, port0 or port1 - InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >, InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >, InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >, InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >, diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 0f2c008ab9..4132463ee8 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -283,6 +283,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasLZCNT = true; ToggleFeature(X86::FeatureLZCNT); } + if (IsIntel && ((ECX >> 8) & 0x1)) { + HasPRFCHW = true; + ToggleFeature(X86::FeaturePRFCHW); + } if (IsAMD) { if ((ECX >> 6) & 0x1) { HasSSE4A = true; @@ -310,6 +314,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasBMI = true; ToggleFeature(X86::FeatureBMI); } + if ((EBX >> 4) & 0x1) { + HasHLE = true; + ToggleFeature(X86::FeatureHLE); + } if (IsIntel && ((EBX >> 5) & 0x1)) { X86SSELevel = AVX2; ToggleFeature(X86::FeatureAVX2); @@ -322,6 +330,14 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasRTM = true; ToggleFeature(X86::FeatureRTM); } + if (IsIntel && ((EBX >> 19) & 0x1)) { + HasADX = true; + ToggleFeature(X86::FeatureADX); + } + if (IsIntel && ((EBX >> 18) & 0x1)) { + HasRDSEED = true; + ToggleFeature(X86::FeatureRDSEED); + } } } } @@ -439,7 +455,10 @@ void X86Subtarget::initializeEnvironment() { HasBMI = false; HasBMI2 = false; HasRTM = false; + HasHLE = false; HasADX = false; + HasPRFCHW = false; + HasRDSEED = false; IsBTMemSlow = false; IsUAMemFast = false; HasVectorUAMem = false; @@ -448,6 +467,7 @@ void X86Subtarget::initializeEnvironment() { HasSlowDivide = false; PostRAScheduler = false; PadShortFunctions = false; + CallRegIndirect = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e97da4b6f4..6fbdb1d5f0 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -121,9 +121,18 @@ protected: /// HasRTM - Processor has RTM instructions. bool HasRTM; + /// HasHLE - Processor has HLE. + bool HasHLE; + /// HasADX - Processor has ADX instructions. bool HasADX; + /// HasPRFCHW - Processor has PRFCHW instructions. + bool HasPRFCHW; + + /// HasRDSEED - Processor has RDSEED instructions. + bool HasRDSEED; + /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -153,6 +162,10 @@ protected: /// a stall when returning too early. bool PadShortFunctions; + /// CallRegIndirect - True if the Calls with memory reference should be converted + /// to a register-based indirect call. + bool CallRegIndirect; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -253,7 +266,10 @@ public: bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } bool hasRTM() const { return HasRTM; } + bool hasHLE() const { return HasHLE; } bool hasADX() const { return HasADX; } + bool hasPRFCHW() const { return HasPRFCHW; } + bool hasRDSEED() const { return HasRDSEED; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } @@ -261,6 +277,7 @@ public: bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } bool padShortFunctions() const { return PadShortFunctions; } + bool callRegIndirect() const { return CallRegIndirect; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index be2a997b8e..2336035bea 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -169,6 +169,29 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry<MVT> AVX2CostTable[] = { + // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to + // customize them to detect the cases where shift amount is a scalar one. + { ISD::SHL, MVT::v4i32, 1 }, + { ISD::SRL, MVT::v4i32, 1 }, + { ISD::SRA, MVT::v4i32, 1 }, + { ISD::SHL, MVT::v8i32, 1 }, + { ISD::SRL, MVT::v8i32, 1 }, + { ISD::SRA, MVT::v8i32, 1 }, + { ISD::SHL, MVT::v2i64, 1 }, + { ISD::SRL, MVT::v2i64, 1 }, + { ISD::SHL, MVT::v4i64, 1 }, + { ISD::SRL, MVT::v4i64, 1 }, + }; + + // Look for AVX2 lowering tricks. + if (ST->hasAVX2()) { + int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable), + ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2CostTable[Idx].Cost; + } + static const CostTblEntry<MVT> AVX1CostTable[] = { // We don't have to scalarize unsupported ops. We can issue two half-sized // operations and we only need to extract the upper YMM half. @@ -248,17 +271,40 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, - { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, - { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, - { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, + + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 8 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 3 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i8, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i16, 3 }, + { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, + + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 5 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 5 }, + { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 9 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 6 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i8, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, + { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 8 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, }; diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 019c4570d9..6b6480e4b4 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -409,7 +409,7 @@ XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } if (RegInfo->requiresRegisterScavenging(MF)) { // Reserve a slot close to SP or frame pointer. - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index a75212a386..bc5109b4d4 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1,4 +1,4 @@ -//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===// +//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===// // // The LLVM Compiler Infrastructure // @@ -14,6 +14,8 @@ // to the function does not create any copies of the pointer value that // outlive the call. This more or less means that the pointer is only // dereferenced, and not returned from the function or stored in a global. +// Finally, well-known library call declarations are marked with all +// attributes that are consistent with the function's standard definition. // This pass is implemented as a bottom-up traversal of the call-graph. // //===----------------------------------------------------------------------===// @@ -32,12 +34,14 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); STATISTIC(NumNoAlias, "Number of function returns marked noalias"); +STATISTIC(NumAnnotated, "Number of attributes added to library functions"); namespace { struct FunctionAttrs : public CallGraphSCCPass { @@ -62,14 +66,63 @@ namespace { // AddNoAliasAttrs - Deduce noalias attributes for the SCC. bool AddNoAliasAttrs(const CallGraphSCC &SCC); + // Utility methods used by inferPrototypeAttributes to add attributes + // and maintain annotation statistics. + + void setDoesNotAccessMemory(Function &F) { + if (!F.doesNotAccessMemory()) { + F.setDoesNotAccessMemory(); + ++NumAnnotated; + } + } + + void setOnlyReadsMemory(Function &F) { + if (!F.onlyReadsMemory()) { + F.setOnlyReadsMemory(); + ++NumAnnotated; + } + } + + void setDoesNotThrow(Function &F) { + if (!F.doesNotThrow()) { + F.setDoesNotThrow(); + ++NumAnnotated; + } + } + + void setDoesNotCapture(Function &F, unsigned n) { + if (!F.doesNotCapture(n)) { + F.setDoesNotCapture(n); + ++NumAnnotated; + } + } + + void setDoesNotAlias(Function &F, unsigned n) { + if (!F.doesNotAlias(n)) { + F.setDoesNotAlias(n); + ++NumAnnotated; + } + } + + // inferPrototypeAttributes - Analyze the name and prototype of the + // given function and set any applicable attributes. Returns true + // if any attributes were set and false otherwise. + bool inferPrototypeAttributes(Function &F); + + // annotateLibraryCalls - Adds attributes to well-known standard library + // call declarations. + bool annotateLibraryCalls(const CallGraphSCC &SCC); + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired<AliasAnalysis>(); + AU.addRequired<TargetLibraryInfo>(); CallGraphSCCPass::getAnalysisUsage(AU); } private: AliasAnalysis *AA; + TargetLibraryInfo *TLI; }; } @@ -77,6 +130,7 @@ char FunctionAttrs::ID = 0; INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_PASS_END(FunctionAttrs, "functionattrs", "Deduce function attributes", false, false) @@ -598,10 +652,693 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { return MadeChange; } +/// inferPrototypeAttributes - Analyze the name and prototype of the +/// given function and set any applicable attributes. Returns true +/// if any attributes were set and false otherwise. +bool FunctionAttrs::inferPrototypeAttributes(Function &F) { + FunctionType *FTy = F.getFunctionType(); + LibFunc::Func TheLibFunc; + if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc))) + return false; + + switch (TheLibFunc) { + case LibFunc::strlen: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strchr: + case LibFunc::strrchr: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcat: + case LibFunc::strtol: + case LibFunc::strtod: + case LibFunc::strtof: + case LibFunc::strtoul: + case LibFunc::strtoll: + case LibFunc::strtold: + case LibFunc::strncat: + case LibFunc::strncpy: + case LibFunc::stpncpy: + case LibFunc::strtoull: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strxfrm: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strcmp: + case LibFunc::strspn: + case LibFunc::strncmp: + case LibFunc::strcspn: + case LibFunc::strcoll: + case LibFunc::strcasecmp: + case LibFunc::strncasecmp: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::strstr: + case LibFunc::strpbrk: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::strtok: + case LibFunc::strtok_r: + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::scanf: + case LibFunc::setbuf: + case LibFunc::setvbuf: + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::strdup: + case LibFunc::strndup: + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat: + case LibFunc::sscanf: + case LibFunc::sprintf: + case LibFunc::statvfs: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::snprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::setitimer: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + break; + case LibFunc::system: + if (FTy->getNumParams() != 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::malloc: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::memcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::memchr: + case LibFunc::memrchr: + if (FTy->getNumParams() != 3) + return false; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + break; + case LibFunc::modf: + case LibFunc::modff: + case LibFunc::modfl: + case LibFunc::memcpy: + case LibFunc::memccpy: + case LibFunc::memmove: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::memalign: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotAlias(F, 0); + break; + case LibFunc::mkdir: + case LibFunc::mktime: + if (FTy->getNumParams() == 0 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::realloc: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::read: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::rmdir: + case LibFunc::rewind: + case LibFunc::remove: + case LibFunc::realpath: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::rename: + case LibFunc::readlink: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::write: + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + break; + case LibFunc::bcopy: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bcmp: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::bzero: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::calloc: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::chmod: + case LibFunc::chown: + case LibFunc::ctermid: + case LibFunc::clearerr: + case LibFunc::closedir: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::atoi: + case LibFunc::atol: + case LibFunc::atof: + case LibFunc::atoll: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::access: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::fopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fdopen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + break; + case LibFunc::feof: + case LibFunc::free: + case LibFunc::fseek: + case LibFunc::ftell: + case LibFunc::fgetc: + case LibFunc::fseeko: + case LibFunc::ftello: + case LibFunc::fileno: + case LibFunc::fflush: + case LibFunc::fclose: + case LibFunc::fsetpos: + case LibFunc::flockfile: + case LibFunc::funlockfile: + case LibFunc::ftrylockfile: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ferror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + break; + case LibFunc::fputc: + case LibFunc::fstat: + case LibFunc::frexp: + case LibFunc::frexpf: + case LibFunc::frexpl: + case LibFunc::fstatvfs: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::fgets: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + case LibFunc::fread: + case LibFunc::fwrite: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + case LibFunc::fputs: + case LibFunc::fscanf: + case LibFunc::fprintf: + case LibFunc::fgetpos: + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::getc: + case LibFunc::getlogin_r: + case LibFunc::getc_unlocked: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::getenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::gets: + case LibFunc::getchar: + setDoesNotThrow(F); + break; + case LibFunc::getitimer: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::getpwnam: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::ungetc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::uname: + case LibFunc::unlink: + case LibFunc::unsetenv: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::utime: + case LibFunc::utimes: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::puts: + case LibFunc::printf: + case LibFunc::perror: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::pread: + case LibFunc::pwrite: + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return false; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + break; + case LibFunc::putchar: + setDoesNotThrow(F); + break; + case LibFunc::popen: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::pclose: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vscanf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vsscanf: + case LibFunc::vfscanf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::valloc: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::vprintf: + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::vfprintf: + case LibFunc::vsprintf: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::vsnprintf: + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + break; + case LibFunc::open: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + case LibFunc::opendir: + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::times: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::htonl: + case LibFunc::htons: + case LibFunc::ntohl: + case LibFunc::ntohs: + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + break; + case LibFunc::lstat: + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::lchown: + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::qsort: + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return false; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + break; + case LibFunc::dunder_strdup: + case LibFunc::dunder_strndup: + if (FTy->getNumParams() < 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + break; + case LibFunc::dunder_strtok_r: + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::under_IO_getc: + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::under_IO_putc: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::dunder_isoc99_scanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::stat64: + case LibFunc::lstat64: + case LibFunc::statvfs64: + case LibFunc::dunder_isoc99_sscanf: + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fopen64: + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + break; + case LibFunc::fseeko64: + case LibFunc::ftello64: + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + break; + case LibFunc::tmpfile64: + if (!FTy->getReturnType()->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + break; + case LibFunc::fstat64: + case LibFunc::fstatvfs64: + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return false; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + break; + case LibFunc::open64: + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return false; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + break; + default: + // Didn't mark any attributes. + return false; + } + + return true; +} + +/// annotateLibraryCalls - Adds attributes to well-known standard library +/// call declarations. +bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { + bool MadeChange = false; + + // Check each function in turn annotating well-known library function + // declarations with attributes. + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + Function *F = (*I)->getFunction(); + + if (F != 0 && F->isDeclaration()) + MadeChange |= inferPrototypeAttributes(*F); + } + + return MadeChange; +} + bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) { AA = &getAnalysis<AliasAnalysis>(); + TLI = &getAnalysis<TargetLibraryInfo>(); - bool Changed = AddReadAttrs(SCC); + bool Changed = annotateLibraryCalls(SCC); + Changed |= AddReadAttrs(SCC); Changed |= AddNoCaptureAttrs(SCC); Changed |= AddNoAliasAttrs(SCC); return Changed; diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c6d60d6f00..7595da08d3 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -44,7 +44,7 @@ namespace { } void set(const APFloat& C); - + void negate(); bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } @@ -79,6 +79,14 @@ namespace { bool isInt() const { return !IsFp; } + // If the coefficient is represented by an integer, promote it to a + // floating point. + void convertToFpType(const fltSemantics &Sem); + + // Construct an APFloat from a signed integer. + // TODO: We should get rid of this function when APFloat can be constructed + // from an *SIGNED* integer. + APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val); private: bool IsFp; @@ -150,7 +158,9 @@ namespace { typedef SmallVector<const FAddend*, 4> AddendVect; Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); - + + Value *performFactorization(Instruction *I); + /// Convert given addend to a Value Value *createAddendVal(const FAddend &A, bool& NeedNeg); @@ -159,6 +169,7 @@ namespace { Value *createFSub(Value *Opnd0, Value *Opnd1); Value *createFAdd(Value *Opnd0, Value *Opnd1); Value *createFMul(Value *Opnd0, Value *Opnd1); + Value *createFDiv(Value *Opnd0, Value *Opnd1); Value *createFNeg(Value *V); Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst); @@ -203,7 +214,31 @@ void FAddendCoef::set(const APFloat& C) { IsFp = BufHasFpVal = true; } -void FAddendCoef::operator=(const FAddendCoef& That) { +void FAddendCoef::convertToFpType(const fltSemantics &Sem) { + if (!isInt()) + return; + + APFloat *P = getFpValPtr(); + if (IntVal > 0) + new(P) APFloat(Sem, IntVal); + else { + new(P) APFloat(Sem, 0 - IntVal); + P->changeSign(); + } + IsFp = BufHasFpVal = true; +} + +APFloat FAddendCoef::createAPFloatFromInt(const fltSemantics &Sem, int Val) { + if (Val >= 0) + return APFloat(Sem, Val); + + APFloat T(Sem, 0 - Val); + T.changeSign(); + + return T; +} + +void FAddendCoef::operator=(const FAddendCoef &That) { if (That.isInt()) set(That.IntVal); else @@ -222,13 +257,13 @@ void FAddendCoef::operator+=(const FAddendCoef &That) { if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().add(T, RndMode); return; } APFloat &T = getFpVal(); - T.add(APFloat(T.getSemantics(), That.IntVal), RndMode); + T.add(createAPFloatFromInt(T.getSemantics(), That.IntVal), RndMode); } void FAddendCoef::operator-=(const FAddendCoef &That) { @@ -243,13 +278,13 @@ void FAddendCoef::operator-=(const FAddendCoef &That) { if (isInt()) { const APFloat &T = That.getFpVal(); - set(T); - getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode); + convertToFpType(T.getSemantics()); + getFpVal().subtract(T, RndMode); return; } APFloat &T = getFpVal(); - T.subtract(APFloat(T.getSemantics(), IntVal), RndMode); + T.subtract(createAPFloatFromInt(T.getSemantics(), IntVal), RndMode); } void FAddendCoef::operator*=(const FAddendCoef &That) { @@ -272,11 +307,12 @@ void FAddendCoef::operator*=(const FAddendCoef &That) { isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); if (isInt()) - set(APFloat(Semantic, IntVal)); + convertToFpType(Semantic); APFloat &F0 = getFpVal(); if (That.isInt()) - F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven); + F0.multiply(createAPFloatFromInt(Semantic, That.IntVal), + APFloat::rmNearestTiesToEven); else F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); @@ -388,6 +424,78 @@ unsigned FAddend::drillAddendDownOneStep return BreakNum; } +// Try to perform following optimization on the input instruction I. Return the +// simplified expression if was successful; otherwise, return 0. +// +// Instruction "I" is Simplified into +// ------------------------------------------------------- +// (x * y) +/- (x * z) x * (y +/- z) +// (y / x) +/- (z / x) (y +/- z) / x +// +Value *FAddCombine::performFactorization(Instruction *I) { + assert((I->getOpcode() == Instruction::FAdd || + I->getOpcode() == Instruction::FSub) && "Expect add/sub"); + + Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); + Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); + + if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) + return 0; + + bool isMpy = false; + if (I0->getOpcode() == Instruction::FMul) + isMpy = true; + else if (I0->getOpcode() != Instruction::FDiv) + return 0; + + Value *Opnd0_0 = I0->getOperand(0); + Value *Opnd0_1 = I0->getOperand(1); + Value *Opnd1_0 = I1->getOperand(0); + Value *Opnd1_1 = I1->getOperand(1); + + // Input Instr I Factor AddSub0 AddSub1 + // ---------------------------------------------- + // (x*y) +/- (x*z) x y z + // (y/x) +/- (z/x) x y z + // + Value *Factor = 0; + Value *AddSub0 = 0, *AddSub1 = 0; + + if (isMpy) { + if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) + Factor = Opnd0_0; + else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) + Factor = Opnd0_1; + + if (Factor) { + AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; + AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; + } + } else if (Opnd0_1 == Opnd1_1) { + Factor = Opnd0_1; + AddSub0 = Opnd0_0; + AddSub1 = Opnd1_0; + } + + if (!Factor) + return 0; + + // Create expression "NewAddSub = AddSub0 +/- AddsSub1" + Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? + createFAdd(AddSub0, AddSub1) : + createFSub(AddSub0, AddSub1); + if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { + const APFloat &F = CFP->getValueAPF(); + if (!F.isNormal() || F.isDenormal()) + return 0; + } + + if (isMpy) + return createFMul(Factor, NewAddSub); + + return createFDiv(NewAddSub, Factor); +} + Value *FAddCombine::simplify(Instruction *I) { assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); @@ -471,7 +579,8 @@ Value *FAddCombine::simplify(Instruction *I) { return R; } - return 0; + // step 6: Try factorization as the last resort, + return performFactorization(I); } Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { @@ -627,7 +736,8 @@ Value *FAddCombine::createNaryFAdd Value *FAddCombine::createFSub (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFSub(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } @@ -639,13 +749,22 @@ Value *FAddCombine::createFNeg(Value *V) { Value *FAddCombine::createFAdd (Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFAdd(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { Value *V = Builder->CreateFMul(Opnd0, Opnd1); - createInstPostProc(cast<Instruction>(V)); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); + return V; +} + +Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { + Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + if (Instruction *I = dyn_cast<Instruction>(V)) + createInstPostProc(I); return V; } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index d162223a6f..2ee1278d23 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1610,6 +1610,9 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ + // We need to know the target byte order to perform this optimization. + if (!IC.getDataLayout()) return 0; + Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); @@ -1631,7 +1634,10 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); } - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + unsigned Elt = 0; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } @@ -1653,6 +1659,8 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } unsigned Elt = ShAmt->getZExtValue() / DestWidth; + if (IC.getDataLayout()->isBigEndian()) + Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt; return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index bad46b4dab..a96e754f3d 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -139,6 +139,31 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS, } } +/// Returns true if the exploded icmp can be expressed as a signed comparison +/// to zero and updates the predicate accordingly. +/// The signedness of the comparison is preserved. +static bool isSignTest(ICmpInst::Predicate &pred, const ConstantInt *RHS) { + if (!ICmpInst::isSigned(pred)) + return false; + + if (RHS->isZero()) + return ICmpInst::isRelational(pred); + + if (RHS->isOne()) { + if (pred == ICmpInst::ICMP_SLT) { + pred = ICmpInst::ICMP_SLE; + return true; + } + } else if (RHS->isAllOnesValue()) { + if (pred == ICmpInst::ICMP_SGT) { + pred = ICmpInst::ICMP_SGE; + return true; + } + } + + return false; +} + // isHighOnes - Return true if the constant is of the form 1+0+. // This is the same as lowones(~X). static bool isHighOnes(const ConstantInt *CI) { @@ -443,20 +468,29 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } - // If a 32-bit or 64-bit magic bitvector captures the entire comparison state + // If a magic bitvector captures the entire comparison state // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 - if (ArrayElementCount <= 32 || - (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) { - Type *Ty; - if (ArrayElementCount <= 32) + { + Type *Ty = 0; + + // Look for an appropriate type: + // - The type of Idx if the magic fits + // - The smallest fitting legal type if we have a DataLayout + // - Default to i32 + if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth()) + Ty = Idx->getType(); + else if (TD) + Ty = TD->getSmallestLegalIntType(Init->getContext(), ArrayElementCount); + else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - else - Ty = Type::getInt64Ty(Init->getContext()); - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); - return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + + if (Ty != 0) { + Value *V = Builder->CreateIntCast(Idx, Ty, false); + V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); + } } return 0; @@ -1273,6 +1307,23 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, break; } + case Instruction::Mul: { // (icmp pred (mul X, Val), CI) + ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1)); + if (!Val) break; + + // If this is a signed comparison to 0 and the mul is sign preserving, + // use the mul LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && !Val->isZero() && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(Val->isNegative() ? + ICmpInst::getSwappedPredicate(pred) : pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + + break; + } + case Instruction::Shl: { // (icmp pred (shl X, ShAmt), CI) ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1)); if (!ShAmt) break; @@ -1304,6 +1355,12 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), ConstantExpr::getLShr(RHS, ShAmt)); + // If the shift is NSW and we compare to 0, then it is just shifting out + // sign bits, no need for an AND either. + if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0) + return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0), + ConstantExpr::getLShr(RHS, ShAmt)); + if (LHSI->hasOneUse()) { // Otherwise strength reduce the shift into an and. uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); @@ -1318,6 +1375,15 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } + // If this is a signed comparison to 0 and the shift is sign preserving, + // use the shift LHS operand instead. + ICmpInst::Predicate pred = ICI.getPredicate(); + if (isSignTest(pred, RHS) && + cast<BinaryOperator>(LHSI)->hasNoSignedWrap()) + return new ICmpInst(pred, + LHSI->getOperand(0), + Constant::getNullValue(RHS->getType())); + // Otherwise, if this is a comparison of the sign bit, simplify to and/test. bool TrueIfSigned = false; if (LHSI->hasOneUse() && @@ -1333,13 +1399,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } // Transform (icmp pred iM (shl iM %v, N), CI) - // -> (icmp pred i(M-N) (trunc %v iM to i(N-N)), (trunc (CI>>N)) - // Transform the shl to a trunc if (trunc (CI>>N)) has no loss. + // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N)) + // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N. // This enables to get rid of the shift in favor of a trunc which can be // free on the target. It has the additional benefit of comparing to a // smaller constant, which will be target friendly. unsigned Amt = ShAmt->getLimitedValue(TypeBits-1); - if (Amt != 0 && RHSV.countTrailingZeros() >= Amt) { + if (LHSI->hasOneUse() && + Amt != 0 && RHSV.countTrailingZeros() >= Amt) { Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt); Constant *NCI = ConstantExpr::getTrunc( ConstantExpr::getAShr(RHS, @@ -1531,6 +1598,19 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, return new ICmpInst(pred, X, NegX); } } + break; + case Instruction::Mul: + if (RHSV == 0 && BO->hasNoSignedWrap()) { + if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) { + // The trivial case (mul X, 0) is handled by InstSimplify + // General case : (mul X, C) != 0 iff X != 0 + // (mul X, C) == 0 iff X == 0 + if (!BOC->isZero()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), + Constant::getNullValue(RHS->getType())); + } + } + break; default: break; } } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) { diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index a262d711d3..121aa1f8d7 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -127,13 +127,14 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // If this is a non-volatile load or a cast from the same type, // merge. if (TI->isCast()) { - if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType()) + Type *FIOpndTy = FI->getOperand(0)->getType(); + if (TI->getOperand(0)->getType() != FIOpndTy) return 0; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); - if (CondTy->isVectorTy() && CondTy->getVectorNumElements() != - FI->getOperand(0)->getType()->getVectorNumElements()) + if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || + CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) return 0; } else { return 0; // unknown unary op. diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 6877475b1d..623c470506 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -71,7 +71,7 @@ static const char *kAsanRegisterGlobalsName = "__asan_register_globals"; static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *kAsanInitName = "__asan_init_v1"; +static const char *kAsanInitName = "__asan_init_v3"; static const char *kAsanHandleNoReturnName = "__asan_handle_no_return"; static const char *kAsanMappingOffsetName = "__asan_mapping_offset"; static const char *kAsanMappingScaleName = "__asan_mapping_scale"; @@ -244,7 +244,7 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(bool CheckInitOrder = false, + AddressSanitizer(bool CheckInitOrder = true, bool CheckUseAfterReturn = false, bool CheckLifetime = false, StringRef BlacklistFile = StringRef(), @@ -274,8 +274,6 @@ struct AddressSanitizer : public FunctionPass { Instruction *InsertBefore, bool IsWrite); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F); - void createInitializerPoisonCalls(Module &M, - Value *FirstAddr, Value *LastAddr); bool maybeInsertAsanInitAtFunctionEntry(Function &F); void emitShadowMapping(Module &M, IRBuilder<> &IRB) const; virtual bool doInitialization(Module &M); @@ -315,7 +313,7 @@ struct AddressSanitizer : public FunctionPass { class AddressSanitizerModule : public ModulePass { public: - AddressSanitizerModule(bool CheckInitOrder = false, + AddressSanitizerModule(bool CheckInitOrder = true, StringRef BlacklistFile = StringRef(), bool ZeroBaseShadow = false) : ModulePass(ID), @@ -333,8 +331,7 @@ class AddressSanitizerModule : public ModulePass { void initializeCallbacks(Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); - void createInitializerPoisonCalls(Module &M, Value *FirstAddr, - Value *LastAddr); + void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); size_t RedzoneSize() const { return RedzoneSizeForScale(Mapping.Scale); } @@ -531,9 +528,12 @@ static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { // Create a constant for Str so that we can pass it to the run-time lib. static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) { Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str); - return new GlobalVariable(M, StrConst->getType(), true, + GlobalVariable *GV = new GlobalVariable(M, StrConst->getType(), true, GlobalValue::PrivateLinkage, StrConst, kAsanGenPrefix); + GV->setUnnamedAddr(true); // Ok to merge these. + GV->setAlignment(1); // Strings may not be merged w/o setting align 1. + return GV; } static bool GlobalWasGeneratedByAsan(GlobalVariable *G) { @@ -750,7 +750,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, } void AddressSanitizerModule::createInitializerPoisonCalls( - Module &M, Value *FirstAddr, Value *LastAddr) { + Module &M, GlobalValue *ModuleName) { // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); // If that function is not present, this TU contains no globals, or they have @@ -762,7 +762,8 @@ void AddressSanitizerModule::createInitializerPoisonCalls( IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. - IRB.CreateCall2(AsanPoisonGlobals, FirstAddr, LastAddr); + Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); + IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr); // Add calls to unpoison all globals before each return instruction. for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); @@ -836,7 +837,7 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Declare our poisoning and unpoisoning functions. AsanPoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( - kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + kAsanPoisonGlobalsName, IRB.getVoidTy(), IntptrTy, NULL)); AsanPoisonGlobals->setLinkage(Function::ExternalLinkage); AsanUnpoisonGlobals = checkInterfaceFunction(M.getOrInsertFunction( kAsanUnpoisonGlobalsName, IRB.getVoidTy(), NULL)); @@ -885,11 +886,12 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // size_t size; // size_t size_with_redzone; // const char *name; + // const char *module_name; // size_t has_dynamic_init; // We initialize an array of such structures and pass it to a run-time call. StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, - IntptrTy, NULL); + IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n), DynamicInit; @@ -897,9 +899,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) { assert(CtorFunc); IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - // The addresses of the first and last dynamically initialized globals in - // this TU. Used in initialization order checking. - Value *FirstDynamic = 0, *LastDynamic = 0; + bool HasDynamicallyInitializedGlobals = false; + + GlobalVariable *ModuleName = createPrivateGlobalForString( + M, M.getModuleIdentifier()); + // We shouldn't merge same module names, as this string serves as unique + // module ID in runtime. + ModuleName->setUnnamedAddr(false); for (size_t i = 0; i < n; i++) { static const uint64_t kMaxGlobalRedzone = 1 << 18; @@ -930,11 +936,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewTy, G->getInitializer(), Constant::getNullValue(RightRedZoneTy), NULL); - SmallString<2048> DescriptionOfGlobal = G->getName(); - DescriptionOfGlobal += " ("; - DescriptionOfGlobal += M.getModuleIdentifier(); - DescriptionOfGlobal += ")"; - GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal); + GlobalVariable *Name = createPrivateGlobalForString(M, G->getName()); // Create a new global variable with enough space for a redzone. GlobalVariable *NewGlobal = new GlobalVariable( @@ -958,15 +960,13 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), + ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), NULL); // Populate the first and last globals declared in this TU. - if (CheckInitOrder && GlobalHasDynamicInitializer) { - LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy); - if (FirstDynamic == 0) - FirstDynamic = LastDynamic; - } + if (CheckInitOrder && GlobalHasDynamicInitializer) + HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); } @@ -977,8 +977,8 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); // Create calls for poisoning before initializers run and unpoisoning after. - if (CheckInitOrder && FirstDynamic && LastDynamic) - createInitializerPoisonCalls(M, FirstDynamic, LastDynamic); + if (CheckInitOrder && HasDynamicallyInitializedGlobals) + createInitializerPoisonCalls(M, ModuleName); IRB.CreateCall2(AsanRegisterGlobals, IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, n)); @@ -1095,6 +1095,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { bool AddressSanitizer::runOnFunction(Function &F) { if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); initializeCallbacks(*F.getParent()); @@ -1312,10 +1313,10 @@ void FunctionStackPoisoner::poisonStack() { ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); } - // This string will be parsed by the run-time (DescribeStackAddress). + // This string will be parsed by the run-time (DescribeAddressIfStack). SmallString<2048> StackDescriptionStorage; raw_svector_ostream StackDescription(StackDescriptionStorage); - StackDescription << F.getName() << " " << AllocaVec.size() << " "; + StackDescription << AllocaVec.size() << " "; // Insert poison calls for lifetime intrinsics for alloca. bool HavePoisonedAllocas = false; @@ -1348,19 +1349,26 @@ void FunctionStackPoisoner::poisonStack() { } assert(Pos == LocalStackSize); - // Write the Magic value and the frame description constant to the redzone. + // The left-most redzone has enough space for at least 4 pointers. + // Write the Magic value to redzone[0]. Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy); IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic), BasePlus0); - Value *BasePlus1 = IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, - ASan.LongSize/8)); - BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy); + // Write the frame description constant to redzone[1]. + Value *BasePlus1 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, ASan.LongSize/8)), + IntptrPtrTy); GlobalVariable *StackDescriptionGlobal = createPrivateGlobalForString(*F.getParent(), StackDescription.str()); Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy); IRB.CreateStore(Description, BasePlus1); + // Write the PC to redzone[2]. + Value *BasePlus2 = IRB.CreateIntToPtr( + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, + 2 * ASan.LongSize/8)), + IntptrPtrTy); + IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2); // Poison the stack redzones at the entry. Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB); diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index a79873cbf6..2edd151869 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -29,8 +29,10 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugLoc.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/PathV2.h" #include "llvm/Support/raw_ostream.h" @@ -39,35 +41,57 @@ #include <utility> using namespace llvm; +static cl::opt<std::string> +DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, + cl::ValueRequired); + +GCOVOptions GCOVOptions::getDefault() { + GCOVOptions Options; + Options.EmitNotes = true; + Options.EmitData = true; + Options.UseCfgChecksum = false; + Options.NoRedZone = false; + Options.FunctionNamesInData = true; + + if (DefaultGCOVVersion.size() != 4) { + llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") + + DefaultGCOVVersion); + } + memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4); + return Options; +} + namespace { class GCOVProfiler : public ModulePass { public: static char ID; - GCOVProfiler() - : ModulePass(ID), EmitNotes(true), EmitData(true), - UseExtraChecksum(false), NoRedZone(false), - NoFunctionNamesInData(false) { - memcpy(Version, DefaultGCovVersion, 4); + GCOVProfiler() : ModulePass(ID), Options(GCOVOptions::getDefault()) { + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } - GCOVProfiler(bool EmitNotes, bool EmitData, const char (&Version)[4], - bool UseExtraChecksum, bool NoRedZone, - bool NoFunctionNamesInData) - : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData), - UseExtraChecksum(UseExtraChecksum), NoRedZone(NoRedZone), - NoFunctionNamesInData(NoFunctionNamesInData) { - memcpy(this->Version, Version, 4); - assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + GCOVProfiler(const GCOVOptions &Options) : ModulePass(ID), Options(Options){ + assert((Options.EmitNotes || Options.EmitData) && + "GCOVProfiler asked to do nothing?"); + ReversedVersion[0] = Options.Version[3]; + ReversedVersion[1] = Options.Version[2]; + ReversedVersion[2] = Options.Version[1]; + ReversedVersion[3] = Options.Version[0]; + ReversedVersion[4] = '\0'; initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); } virtual const char *getPassName() const { return "GCOV Profiler"; } + private: bool runOnModule(Module &M); - // Create the GCNO files for the Module based on DebugInfo. - void emitGCNO(); + // Create the .gcno files for the Module based on DebugInfo. + void emitProfileNotes(); // Modify the program to track transitions along edges and call into the // profiling runtime to emit .gcda files when run. @@ -78,6 +102,8 @@ namespace { Constant *getIncrementIndirectCounterFunc(); Constant *getEmitFunctionFunc(); Constant *getEmitArcsFunc(); + Constant *getDeleteWriteoutFunctionListFunc(); + Constant *getDeleteFlushFunctionListFunc(); Constant *getEndFileFunc(); // Create or retrieve an i32 state value that is used to represent the @@ -88,23 +114,22 @@ namespace { // block number. GlobalVariable *buildEdgeLookupTable(Function *F, GlobalVariable *Counter, - const UniqueVector<BasicBlock *> &Preds, - const UniqueVector<BasicBlock *> &Succs); + const UniqueVector<BasicBlock *>&Preds, + const UniqueVector<BasicBlock*>&Succs); // Add the function to write out all our counters to the global destructor // list. - void insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); + Function *insertCounterWriteout(ArrayRef<std::pair<GlobalVariable*, + MDNode*> >); + Function *insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); void insertIndirectCounterIncrement(); - void insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> >); std::string mangleName(DICompileUnit CU, const char *NewStem); - bool EmitNotes; - bool EmitData; - char Version[4]; - bool UseExtraChecksum; - bool NoRedZone; - bool NoFunctionNamesInData; + GCOVOptions Options; + + // Reversed, NUL-terminated copy of Options.Version. + char ReversedVersion[5]; Module *M; LLVMContext *Ctx; @@ -115,13 +140,14 @@ char GCOVProfiler::ID = 0; INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", "Insert instrumentation for GCOV profiling", false, false) -ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData, - const char (&Version)[4], - bool UseExtraChecksum, - bool NoRedZone, - bool NoFunctionNamesInData) { - return new GCOVProfiler(EmitNotes, EmitData, Version, UseExtraChecksum, - NoRedZone, NoFunctionNamesInData); +ModulePass *llvm::createGCOVProfilerPass(const GCOVOptions &Options) { + return new GCOVProfiler(Options); +} + +static std::string getFunctionName(DISubprogram SP) { + if (!SP.getLinkageName().empty()) + return SP.getLinkageName(); + return SP.getName(); } namespace { @@ -260,7 +286,7 @@ namespace { class GCOVFunction : public GCOVRecord { public: GCOVFunction(DISubprogram SP, raw_ostream *os, uint32_t Ident, - bool UseExtraChecksum) { + bool UseCfgChecksum) { this->os = os; Function *F = SP.getFunction(); @@ -272,16 +298,16 @@ namespace { ReturnBlock = new GCOVBlock(i++, os); writeBytes(FunctionTag, 4); - uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(getFunctionName(SP)) + 1 + lengthOfGCOVString(SP.getFilename()) + 1; - if (UseExtraChecksum) + if (UseCfgChecksum) ++BlockLen; write(BlockLen); write(Ident); write(0); // lineno checksum - if (UseExtraChecksum) + if (UseCfgChecksum) write(0); // cfg checksum - writeGCOVString(SP.getName()); + writeGCOVString(getFunctionName(SP)); writeGCOVString(SP.getFilename()); write(SP.getLineNumber()); } @@ -356,19 +382,23 @@ std::string GCOVProfiler::mangleName(DICompileUnit CU, const char *NewStem) { SmallString<128> Filename = CU.getFilename(); sys::path::replace_extension(Filename, NewStem); - return sys::path::filename(Filename.str()); + StringRef FName = sys::path::filename(Filename); + SmallString<128> CurPath; + if (sys::fs::current_path(CurPath)) return FName; + sys::path::append(CurPath, FName.str()); + return CurPath.str(); } bool GCOVProfiler::runOnModule(Module &M) { this->M = &M; Ctx = &M.getContext(); - if (EmitNotes) emitGCNO(); - if (EmitData) return emitProfileArcs(); + if (Options.EmitNotes) emitProfileNotes(); + if (Options.EmitData) return emitProfileArcs(); return false; } -void GCOVProfiler::emitGCNO() { +void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -382,7 +412,7 @@ void GCOVProfiler::emitGCNO() { raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo, raw_fd_ostream::F_Binary); out.write("oncg", 4); - out.write(Version, 4); + out.write(ReversedVersion, 4); out.write("MVLL", 4); DIArray SPs = CU.getSubprograms(); @@ -392,7 +422,7 @@ void GCOVProfiler::emitGCNO() { Function *F = SP.getFunction(); if (!F) continue; - GCOVFunction Func(SP, &out, i, UseExtraChecksum); + GCOVFunction Func(SP, &out, i, Options.UseCfgChecksum); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { GCOVBlock &Block = Func.getBlock(BB); @@ -522,8 +552,38 @@ bool GCOVProfiler::emitProfileArcs() { } } - insertCounterWriteout(CountersBySP); - insertFlush(CountersBySP); + Function *WriteoutF = insertCounterWriteout(CountersBySP); + Function *FlushF = insertFlush(CountersBySP); + + // Create a small bit of code that registers the "__llvm_gcov_writeout" to + // be executed at exit and the "__llvm_gcov_flush" function to be executed + // when "__gcov_flush" is called. + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, + "__llvm_gcov_init", M); + F->setUnnamedAddr(true); + F->setLinkage(GlobalValue::InternalLinkage); + F->addFnAttr(Attribute::NoInline); + if (Options.NoRedZone) + F->addFnAttr(Attribute::NoRedZone); + + BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", F); + IRBuilder<> Builder(BB); + + FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + Type *Params[] = { + PointerType::get(FTy, 0), + PointerType::get(FTy, 0) + }; + FTy = FunctionType::get(Builder.getVoidTy(), Params, false); + + // Inialize the environment and register the local writeout and flush + // functions. + Constant *GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy); + Builder.CreateCall2(GCOVInit, WriteoutF, FlushF); + Builder.CreateRetVoid(); + + appendToGlobalCtors(*M, F, 0); } if (InsertIndCounterIncrCode) @@ -619,6 +679,16 @@ Constant *GCOVProfiler::getEmitArcsFunc() { return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); } +Constant *GCOVProfiler::getDeleteWriteoutFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_writeout_function_list", FTy); +} + +Constant *GCOVProfiler::getDeleteFlushFunctionListFunc() { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_delete_flush_function_list", FTy); +} + Constant *GCOVProfiler::getEndFileFunc() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); return M->getOrInsertFunction("llvm_gcda_end_file", FTy); @@ -637,7 +707,7 @@ GlobalVariable *GCOVProfiler::getEdgeStateValue() { return GV; } -void GCOVProfiler::insertCounterWriteout( +Function *GCOVProfiler::insertCounterWriteout( ArrayRef<std::pair<GlobalVariable *, MDNode *> > CountersBySP) { FunctionType *WriteoutFTy = FunctionType::get(Type::getVoidTy(*Ctx), false); Function *WriteoutF = M->getFunction("__llvm_gcov_writeout"); @@ -646,7 +716,7 @@ void GCOVProfiler::insertCounterWriteout( "__llvm_gcov_writeout", M); WriteoutF->setUnnamedAddr(true); WriteoutF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) WriteoutF->addFnAttr(Attribute::NoRedZone); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF); @@ -664,15 +734,15 @@ void GCOVProfiler::insertCounterWriteout( std::string FilenameGcda = mangleName(CU, "gcda"); Builder.CreateCall2(StartFile, Builder.CreateGlobalStringPtr(FilenameGcda), - Builder.CreateGlobalStringPtr(Version)); + Builder.CreateGlobalStringPtr(ReversedVersion)); for (unsigned j = 0, e = CountersBySP.size(); j != e; ++j) { DISubprogram SP(CountersBySP[j].second); - Builder.CreateCall3(EmitFunction, - Builder.getInt32(j), - NoFunctionNamesInData ? - Constant::getNullValue(Builder.getInt8PtrTy()) : - Builder.CreateGlobalStringPtr(SP.getName()), - Builder.getInt8(UseExtraChecksum)); + Builder.CreateCall3( + EmitFunction, Builder.getInt32(j), + Options.FunctionNamesInData ? + Builder.CreateGlobalStringPtr(getFunctionName(SP)) : + Constant::getNullValue(Builder.getInt8PtrTy()), + Builder.getInt8(Options.UseCfgChecksum)); GlobalVariable *GV = CountersBySP[j].first; unsigned Arcs = @@ -684,29 +754,9 @@ void GCOVProfiler::insertCounterWriteout( Builder.CreateCall(EndFile); } } - Builder.CreateRetVoid(); - // Create a small bit of code that registers the "__llvm_gcov_writeout" - // function to be executed at exit. - FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *F = Function::Create(FTy, GlobalValue::InternalLinkage, - "__llvm_gcov_init", M); - F->setUnnamedAddr(true); - F->setLinkage(GlobalValue::InternalLinkage); - F->addFnAttr(Attribute::NoInline); - if (NoRedZone) - F->addFnAttr(Attribute::NoRedZone); - - BB = BasicBlock::Create(*Ctx, "entry", F); - Builder.SetInsertPoint(BB); - - FTy = FunctionType::get(Builder.getInt32Ty(), - PointerType::get(FTy, 0), false); - Constant *AtExitFn = M->getOrInsertFunction("atexit", FTy); - Builder.CreateCall(AtExitFn, WriteoutF); Builder.CreateRetVoid(); - - appendToGlobalCtors(*M, F, 0); + return WriteoutF; } void GCOVProfiler::insertIndirectCounterIncrement() { @@ -715,7 +765,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Fn->setUnnamedAddr(true); Fn->setLinkage(GlobalValue::InternalLinkage); Fn->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) Fn->addFnAttr(Attribute::NoRedZone); // Create basic blocks for function. @@ -760,18 +810,18 @@ void GCOVProfiler::insertIndirectCounterIncrement() { Builder.CreateRetVoid(); } -void GCOVProfiler:: +Function *GCOVProfiler:: insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); - Function *FlushF = M->getFunction("__gcov_flush"); + Function *FlushF = M->getFunction("__llvm_gcov_flush"); if (!FlushF) FlushF = Function::Create(FTy, GlobalValue::InternalLinkage, - "__gcov_flush", M); + "__llvm_gcov_flush", M); else FlushF->setLinkage(GlobalValue::InternalLinkage); FlushF->setUnnamedAddr(true); FlushF->addFnAttr(Attribute::NoInline); - if (NoRedZone) + if (Options.NoRedZone) FlushF->addFnAttr(Attribute::NoRedZone); BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF); @@ -796,8 +846,10 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) { if (RetTy == Type::getVoidTy(*Ctx)) Builder.CreateRetVoid(); else if (RetTy->isIntegerTy()) - // Used if __gcov_flush was implicitly declared. + // Used if __llvm_gcov_flush was implicitly declared. Builder.CreateRet(ConstantInt::get(RetTy, 0)); else - report_fatal_error("invalid return type for __gcov_flush"); + report_fatal_error("invalid return type for __llvm_gcov_flush"); + + return FlushF; } diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index fce6513a97..4e75904ded 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -122,6 +122,9 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call", static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern", cl::desc("poison uninitialized stack variables with the given patter"), cl::Hidden, cl::init(0xff)); +static cl::opt<bool> ClPoisonUndef("msan-poison-undef", + cl::desc("poison undef temps"), + cl::Hidden, cl::init(true)); static cl::opt<bool> ClHandleICmp("msan-handle-icmp", cl::desc("propagate shadow through ICmpEQ and ICmpNE"), @@ -690,7 +693,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// /// Clean shadow (all zeroes) means all bits of the value are defined /// (initialized). - Value *getCleanShadow(Value *V) { + Constant *getCleanShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) return 0; @@ -709,6 +712,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return ConstantStruct::get(ST, Vals); } + /// \brief Create a dirty shadow for a given value. + Constant *getPoisonedShadow(Value *V) { + Type *ShadowTy = getShadowTy(V); + if (!ShadowTy) + return 0; + return getPoisonedShadow(ShadowTy); + } + /// \brief Create a clean (zero) origin. Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); @@ -730,7 +741,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return Shadow; } if (UndefValue *U = dyn_cast<UndefValue>(V)) { - Value *AllOnes = getPoisonedShadow(getShadowTy(V)); + Value *AllOnes = ClPoisonUndef ? getPoisonedShadow(V) : getCleanShadow(V); DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n"); (void)U; return AllOnes; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index f93c5ab4c8..299060a42f 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -56,6 +57,9 @@ static cl::opt<bool> ClInstrumentFuncEntryExit( static cl::opt<bool> ClInstrumentAtomics( "tsan-instrument-atomics", cl::init(true), cl::desc("Instrument atomics"), cl::Hidden); +static cl::opt<bool> ClInstrumentMemIntrinsics( + "tsan-instrument-memintrinsics", cl::init(true), + cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); @@ -63,6 +67,7 @@ STATISTIC(NumOmittedReadsBeforeWrite, "Number of reads ignored due to following writes"); STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); STATISTIC(NumInstrumentedVtableWrites, "Number of vtable ptr writes"); +STATISTIC(NumInstrumentedVtableReads, "Number of vtable ptr reads"); STATISTIC(NumOmittedReadsFromConstantGlobals, "Number of reads from constant globals"); STATISTIC(NumOmittedReadsFromVtable, "Number of vtable reads"); @@ -85,12 +90,14 @@ struct ThreadSanitizer : public FunctionPass { void initializeCallbacks(Module &M); bool instrumentLoadOrStore(Instruction *I); bool instrumentAtomic(Instruction *I); + bool instrumentMemIntrinsic(Instruction *I); void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local, SmallVectorImpl<Instruction*> &All); bool addrPointsToConstantData(Value *Addr); int getMemoryAccessFuncIndex(Value *Addr); DataLayout *TD; + Type *IntptrTy; SmallString<64> BlacklistFile; OwningPtr<BlackList> BL; IntegerType *OrdTy; @@ -108,6 +115,8 @@ struct ThreadSanitizer : public FunctionPass { Function *TsanAtomicThreadFence; Function *TsanAtomicSignalFence; Function *TsanVptrUpdate; + Function *TsanVptrLoad; + Function *MemmoveFn, *MemcpyFn, *MemsetFn; }; } // namespace @@ -196,10 +205,22 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { TsanVptrUpdate = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_vptr_update", IRB.getVoidTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), NULL)); + TsanVptrLoad = checkInterfaceFunction(M.getOrInsertFunction( + "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL)); TsanAtomicThreadFence = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL)); TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction( "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL)); + + MemmoveFn = checkInterfaceFunction(M.getOrInsertFunction( + "memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, NULL)); + MemcpyFn = checkInterfaceFunction(M.getOrInsertFunction( + "memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy, NULL)); + MemsetFn = checkInterfaceFunction(M.getOrInsertFunction( + "memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), + IntptrTy, NULL)); } bool ThreadSanitizer::doInitialization(Module &M) { @@ -210,6 +231,7 @@ bool ThreadSanitizer::doInitialization(Module &M) { // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); + IntptrTy = IRB.getIntPtrTy(TD); Value *TsanInit = M.getOrInsertFunction("__tsan_init", IRB.getVoidTy(), NULL); appendToGlobalCtors(M, cast<Function>(TsanInit), 0); @@ -309,6 +331,7 @@ bool ThreadSanitizer::runOnFunction(Function &F) { SmallVector<Instruction*, 8> AllLoadsAndStores; SmallVector<Instruction*, 8> LocalLoadsAndStores; SmallVector<Instruction*, 8> AtomicAccesses; + SmallVector<Instruction*, 8> MemIntrinCalls; bool Res = false; bool HasCalls = false; @@ -325,6 +348,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { else if (isa<ReturnInst>(BI)) RetVec.push_back(BI); else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { + if (isa<MemIntrinsic>(BI)) + MemIntrinCalls.push_back(BI); HasCalls = true; chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } @@ -348,6 +373,11 @@ bool ThreadSanitizer::runOnFunction(Function &F) { Res |= instrumentAtomic(AtomicAccesses[i]); } + if (ClInstrumentMemIntrinsics) + for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) { + Res |= instrumentMemIntrinsic(MemIntrinCalls[i]); + } + // Instrument function entry/exit points if there were instrumented accesses. if ((Res || HasCalls) && ClInstrumentFuncEntryExit) { IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI()); @@ -386,6 +416,12 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) { NumInstrumentedVtableWrites++; return true; } + if (!IsWrite && isVtableAccess(I)) { + IRB.CreateCall(TsanVptrLoad, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + NumInstrumentedVtableReads++; + return true; + } Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx]; IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); if (IsWrite) NumInstrumentedWrites++; @@ -423,6 +459,32 @@ static ConstantInt *createFailOrdering(IRBuilder<> *IRB, AtomicOrdering ord) { return IRB->getInt32(v); } +// If a memset intrinsic gets inlined by the code gen, we will miss races on it. +// So, we either need to ensure the intrinsic is not inlined, or instrument it. +// We do not instrument memset/memmove/memcpy intrinsics (too complicated), +// instead we simply replace them with regular function calls, which are then +// intercepted by the run-time. +// Since tsan is running after everyone else, the calls should not be +// replaced back with intrinsics. If that becomes wrong at some point, +// we will need to call e.g. __tsan_memset to avoid the intrinsics. +bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) { + IRBuilder<> IRB(I); + if (MemSetInst *M = dyn_cast<MemSetInst>(I)) { + IRB.CreateCall3(MemsetFn, + IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)); + I->eraseFromParent(); + } else if (MemTransferInst *M = dyn_cast<MemTransferInst>(I)) { + IRB.CreateCall3(isa<MemCpyInst>(M) ? MemcpyFn : MemmoveFn, + IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)); + I->eraseFromParent(); + } + return false; +} + // Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x // standards. For background see C++11 standard. A slightly older, publically // available draft of the standard (not entirely up-to-date, but close enough diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 5aada9c373..8f917aeb37 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -38,6 +38,7 @@ llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr, switch (Class) { case IC_Autorelease: case IC_AutoreleaseRV: + case IC_IntrinsicUser: case IC_User: // These operations never directly modify a reference count. return false; diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index e062b66555..39670f339e 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -64,7 +64,8 @@ static inline bool ModuleHasARC(const Module &M) { M.getNamedValue("objc_copyWeak") || M.getNamedValue("objc_retainedObject") || M.getNamedValue("objc_unretainedObject") || - M.getNamedValue("objc_unretainedPointer"); + M.getNamedValue("objc_unretainedPointer") || + M.getNamedValue("clang.arc.use"); } /// \enum InstructionClass @@ -89,6 +90,7 @@ enum InstructionClass { IC_CopyWeak, ///< objc_copyWeak (derived) IC_DestroyWeak, ///< objc_destroyWeak (derived) IC_StoreStrong, ///< objc_storeStrong (derived) + IC_IntrinsicUser, ///< clang.arc.use IC_CallOrUser, ///< could call objc_release and/or "use" pointers IC_Call, ///< could call objc_release IC_User, ///< could "use" a pointer @@ -97,6 +99,13 @@ enum InstructionClass { raw_ostream &operator<<(raw_ostream &OS, const InstructionClass Class); +/// \brief Test if the given class is a kind of user. +inline static bool IsUser(InstructionClass Class) { + return Class == IC_User || + Class == IC_CallOrUser || + Class == IC_IntrinsicUser; +} + /// \brief Test if the given class is objc_retain or equivalent. static inline bool IsRetain(InstructionClass Class) { return Class == IC_Retain || @@ -112,13 +121,10 @@ static inline bool IsAutorelease(InstructionClass Class) { /// \brief Test if the given class represents instructions which return their /// argument verbatim. static inline bool IsForwarding(InstructionClass Class) { - // objc_retainBlock technically doesn't always return its argument - // verbatim, but it doesn't matter for our purposes here. return Class == IC_Retain || Class == IC_RetainRV || Class == IC_Autorelease || Class == IC_AutoreleaseRV || - Class == IC_RetainBlock || Class == IC_NoopCast; } @@ -256,11 +262,11 @@ static inline Value *GetObjCArg(Value *Inst) { return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0)); } -static inline bool isNullOrUndef(const Value *V) { +static inline bool IsNullOrUndef(const Value *V) { return isa<ConstantPointerNull>(V) || isa<UndefValue>(V); } -static inline bool isNoopInstruction(const Instruction *I) { +static inline bool IsNoopInstruction(const Instruction *I) { return isa<BitCastInst>(I) || (isa<GetElementPtrInst>(I) && cast<GetElementPtrInst>(I)->hasAllZeroIndices()); diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 1c13d1cbea..b96c64fe81 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -410,7 +410,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { break; } --BBI; - } while (isNoopInstruction(BBI)); + } while (IsNoopInstruction(BBI)); if (&*BBI == GetObjCArg(Inst)) { DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " @@ -429,7 +429,7 @@ bool ObjCARCContract::runOnFunction(Function &F) { case IC_InitWeak: { // objc_initWeak(p, null) => *p = null CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(1))) { + if (IsNullOrUndef(CI->getArgOperand(1))) { Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType())); Changed = true; @@ -453,6 +453,10 @@ bool ObjCARCContract::runOnFunction(Function &F) { if (isa<AllocaInst>(Inst)) TailOkForStoreStrongs = false; continue; + case IC_IntrinsicUser: + // Remove calls to @clang.arc.use(...). + Inst->eraseFromParent(); + continue; default: continue; } diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index 9c14949877..924fb0a9da 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -211,6 +212,9 @@ static bool DoesRetainableObjPtrEscape(const User *Ptr) { // These special functions make copies of their pointer arguments. return true; } + case IC_IntrinsicUser: + // Use by the use intrinsic is not an escape. + continue; case IC_User: case IC_None: // Use by an instruction which copies the value is an escape if the @@ -387,10 +391,6 @@ namespace { /// KnownSafe is true when either of these conditions is satisfied. bool KnownSafe; - /// True if the Calls are objc_retainBlock calls (as opposed to objc_retain - /// calls). - bool IsRetainBlock; - /// True of the objc_release calls are all marked with the "tail" keyword. bool IsTailCallRelease; @@ -407,9 +407,7 @@ namespace { SmallPtrSet<Instruction *, 2> ReverseInsertPts; RRInfo() : - KnownSafe(false), IsRetainBlock(false), - IsTailCallRelease(false), - ReleaseMetadata(0) {} + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0) {} void clear(); }; @@ -417,7 +415,6 @@ namespace { void RRInfo::clear() { KnownSafe = false; - IsRetainBlock = false; IsTailCallRelease = false; ReleaseMetadata = 0; Calls.clear(); @@ -451,11 +448,11 @@ namespace { KnownPositiveRefCount = true; } - void ClearRefCount() { + void ClearKnownPositiveRefCount() { KnownPositiveRefCount = false; } - bool IsKnownIncremented() const { + bool HasKnownPositiveRefCount() const { return KnownPositiveRefCount; } @@ -486,10 +483,6 @@ PtrState::Merge(const PtrState &Other, bool TopDown) { Seq = MergeSeqs(Seq, Other.Seq, TopDown); KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount; - // We can't merge a plain objc_retain with an objc_retainBlock. - if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock) - Seq = S_None; - // If we're not in a sequence (anymore), drop all associated state. if (Seq == S_None) { Partial = false; @@ -698,6 +691,228 @@ void BBState::MergeSucc(const BBState &Other) { MI->second.Merge(PtrState(), /*TopDown=*/false); } +// Only enable ARC Annotations if we are building a debug version of +// libObjCARCOpts. +#ifndef NDEBUG +#define ARC_ANNOTATIONS +#endif + +// Define some macros along the lines of DEBUG and some helper functions to make +// it cleaner to create annotations in the source code and to no-op when not +// building in debug mode. +#ifdef ARC_ANNOTATIONS + +#include "llvm/Support/CommandLine.h" + +/// Enable/disable ARC sequence annotations. +static cl::opt<bool> +EnableARCAnnotations("enable-objc-arc-annotations", cl::init(false)); + +/// This function appends a unique ARCAnnotationProvenanceSourceMDKind id to an +/// instruction so that we can track backwards when post processing via the llvm +/// arc annotation processor tool. If the function is an +static MDString *AppendMDNodeToSourcePtr(unsigned NodeId, + Value *Ptr) { + MDString *Hash = 0; + + // If pointer is a result of an instruction and it does not have a source + // MDNode it, attach a new MDNode onto it. If pointer is a result of + // an instruction and does have a source MDNode attached to it, return a + // reference to said Node. Otherwise just return 0. + if (Instruction *Inst = dyn_cast<Instruction>(Ptr)) { + MDNode *Node; + if (!(Node = Inst->getMetadata(NodeId))) { + // We do not have any node. Generate and attatch the hash MDString to the + // instruction. + + // We just use an MDString to ensure that this metadata gets written out + // of line at the module level and to provide a very simple format + // encoding the information herein. Both of these makes it simpler to + // parse the annotations by a simple external program. + std::string Str; + raw_string_ostream os(Str); + os << "(" << Inst->getParent()->getParent()->getName() << ",%" + << Inst->getName() << ")"; + + Hash = MDString::get(Inst->getContext(), os.str()); + Inst->setMetadata(NodeId, MDNode::get(Inst->getContext(),Hash)); + } else { + // We have a node. Grab its hash and return it. + assert(Node->getNumOperands() == 1 && + "An ARCAnnotationProvenanceSourceMDKind can only have 1 operand."); + Hash = cast<MDString>(Node->getOperand(0)); + } + } else if (Argument *Arg = dyn_cast<Argument>(Ptr)) { + std::string str; + raw_string_ostream os(str); + os << "(" << Arg->getParent()->getName() << ",%" << Arg->getName() + << ")"; + Hash = MDString::get(Arg->getContext(), os.str()); + } + + return Hash; +} + +static std::string SequenceToString(Sequence A) { + std::string str; + raw_string_ostream os(str); + os << A; + return os.str(); +} + +/// Helper function to change a Sequence into a String object using our overload +/// for raw_ostream so we only have printing code in one location. +static MDString *SequenceToMDString(LLVMContext &Context, + Sequence A) { + return MDString::get(Context, SequenceToString(A)); +} + +/// A simple function to generate a MDNode which describes the change in state +/// for Value *Ptr caused by Instruction *Inst. +static void AppendMDNodeToInstForPtr(unsigned NodeId, + Instruction *Inst, + Value *Ptr, + MDString *PtrSourceMDNodeID, + Sequence OldSeq, + Sequence NewSeq) { + MDNode *Node = 0; + Value *tmp[3] = {PtrSourceMDNodeID, + SequenceToMDString(Inst->getContext(), + OldSeq), + SequenceToMDString(Inst->getContext(), + NewSeq)}; + Node = MDNode::get(Inst->getContext(), + ArrayRef<Value*>(tmp, 3)); + + Inst->setMetadata(NodeId, Node); +} + +/// Add to the beginning of the basic block llvm.ptr.annotations which show the +/// state of a pointer at the entrance to a basic block. +static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, + Value *Ptr, Sequence Seq) { + Module *M = BB->getParent()->getParent(); + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = {I8XX, I8XX}; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), + ArrayRef<Type*>(Params, 2), + /*isVarArg=*/false); + Constant *Callee = M->getOrInsertFunction(Name, FTy); + + IRBuilder<> Builder(BB, BB->getFirstInsertionPt()); + + Value *PtrName; + StringRef Tmp = Ptr->getName(); + if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, + Tmp + "_STR"); + PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), Tmp); + } + + Value *S; + std::string SeqStr = SequenceToString(Seq); + if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, + SeqStr + "_STR"); + S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), SeqStr); + } + + Builder.CreateCall2(Callee, PtrName, S); +} + +/// Add to the end of the basic block llvm.ptr.annotations which show the state +/// of the pointer at the bottom of the basic block. +static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, + Value *Ptr, Sequence Seq) { + Module *M = BB->getParent()->getParent(); + LLVMContext &C = M->getContext(); + Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C)); + Type *I8XX = PointerType::getUnqual(I8X); + Type *Params[] = {I8XX, I8XX}; + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), + ArrayRef<Type*>(Params, 2), + /*isVarArg=*/false); + Constant *Callee = M->getOrInsertFunction(Name, FTy); + + IRBuilder<> Builder(BB, llvm::prior(BB->end())); + + Value *PtrName; + StringRef Tmp = Ptr->getName(); + if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, + Tmp + "_STR"); + PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), Tmp); + } + + Value *S; + std::string SeqStr = SequenceToString(Seq); + if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, + SeqStr + "_STR"); + S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, + cast<Constant>(ActualPtrName), SeqStr); + } + Builder.CreateCall2(Callee, PtrName, S); +} + +/// Adds a source annotation to pointer and a state change annotation to Inst +/// referencing the source annotation and the old/new state of pointer. +static void GenerateARCAnnotation(unsigned InstMDId, + unsigned PtrMDId, + Instruction *Inst, + Value *Ptr, + Sequence OldSeq, + Sequence NewSeq) { + if (EnableARCAnnotations) { + // First generate the source annotation on our pointer. This will return an + // MDString* if Ptr actually comes from an instruction implying we can put + // in a source annotation. If AppendMDNodeToSourcePtr returns 0 (i.e. NULL), + // then we know that our pointer is from an Argument so we put a reference + // to the argument number. + // + // The point of this is to make it easy for the + // llvm-arc-annotation-processor tool to cross reference where the source + // pointer is in the LLVM IR since the LLVM IR parser does not submit such + // information via debug info for backends to use (since why would anyone + // need such a thing from LLVM IR besides in non standard cases + // [i.e. this]). + MDString *SourcePtrMDNode = + AppendMDNodeToSourcePtr(PtrMDId, Ptr); + AppendMDNodeToInstForPtr(InstMDId, Inst, Ptr, SourcePtrMDNode, OldSeq, + NewSeq); + } +} + +// The actual interface for accessing the above functionality is defined via +// some simple macros which are defined below. We do this so that the user does +// not need to pass in what metadata id is needed resulting in cleaner code and +// additionally since it provides an easy way to conditionally no-op all +// annotation support in a non-debug build. + +/// Use this macro to annotate a sequence state change when processing +/// instructions bottom up, +#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) \ + GenerateARCAnnotation(ARCAnnotationBottomUpMDKind, \ + ARCAnnotationProvenanceSourceMDKind, (inst), \ + const_cast<Value*>(ptr), (old), (new)) +/// Use this macro to annotate a sequence state change when processing +/// instructions top down. +#define ANNOTATE_TOPDOWN(inst, ptr, old, new) \ + GenerateARCAnnotation(ARCAnnotationTopDownMDKind, \ + ARCAnnotationProvenanceSourceMDKind, (inst), \ + const_cast<Value*>(ptr), (old), (new)) + +#else // !ARC_ANNOTATION +// If annotations are off, noop. +#define ANNOTATE_BOTTOMUP(inst, ptr, old, new) +#define ANNOTATE_TOPDOWN(inst, ptr, old, new) +#endif // !ARC_ANNOTATION + namespace { /// \brief The main ARC optimization pass. class ObjCARCOpt : public FunctionPass { @@ -738,6 +953,15 @@ namespace { /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata. unsigned NoObjCARCExceptionsMDKind; +#ifdef ARC_ANNOTATIONS + /// The Metadata Kind for llvm.arc.annotation.bottomup metadata. + unsigned ARCAnnotationBottomUpMDKind; + /// The Metadata Kind for llvm.arc.annotation.topdown metadata. + unsigned ARCAnnotationTopDownMDKind; + /// The Metadata Kind for llvm.arc.annotation.provenancesource metadata. + unsigned ARCAnnotationProvenanceSourceMDKind; +#endif // ARC_ANNOATIONS + Constant *getRetainRVCallee(Module *M); Constant *getAutoreleaseRVCallee(Module *M); Constant *getReleaseCallee(Module *M); @@ -751,6 +975,8 @@ namespace { bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV); void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, InstructionClass &Class); + bool OptimizeRetainBlockCall(Function &F, Instruction *RetainBlock, + InstructionClass &Class); void OptimizeIndividualCalls(Function &F); void CheckForCFGHazards(const BasicBlock *BB, @@ -958,7 +1184,7 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) { // Check that the call is next to the retain. BasicBlock::const_iterator I = Call; ++I; - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I != Retain) return; @@ -990,14 +1216,14 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { if (Call->getParent() == RetainRV->getParent()) { BasicBlock::const_iterator I = Call; ++I; - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) { BasicBlock *RetainRVParent = RetainRV->getParent(); if (II->getNormalDest() == RetainRVParent) { BasicBlock::const_iterator I = RetainRVParent->begin(); - while (isNoopInstruction(I)) ++I; + while (IsNoopInstruction(I)) ++I; if (&*I == RetainRV) return false; } @@ -1008,7 +1234,7 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) { // pointer. In this case, we can delete the pair. BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin(); if (I != Begin) { - do --I; while (I != Begin && isNoopInstruction(I)); + do --I; while (I != Begin && IsNoopInstruction(I)); if (GetBasicInstructionClass(I) == IC_AutoreleaseRV && GetObjCArg(I) == Arg) { Changed = true; @@ -1084,6 +1310,35 @@ ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV, } +// \brief Attempt to strength reduce objc_retainBlock calls to objc_retain +// calls. +// +// Specifically: If an objc_retainBlock call has the copy_on_escape metadata and +// does not escape (following the rules of block escaping), strength reduce the +// objc_retainBlock to an objc_retain. +// +// TODO: If an objc_retainBlock call is dominated period by a previous +// objc_retainBlock call, strength reduce the objc_retainBlock to an +// objc_retain. +bool +ObjCARCOpt::OptimizeRetainBlockCall(Function &F, Instruction *Inst, + InstructionClass &Class) { + assert(GetBasicInstructionClass(Inst) == Class); + assert(IC_RetainBlock == Class); + + // If we can not optimize Inst, return false. + if (!IsRetainBlockOptimizable(Inst)) + return false; + + CallInst *RetainBlock = cast<CallInst>(Inst); + RetainBlock->setCalledFunction(getRetainCallee(F.getParent())); + // Remove copy_on_escape metadata. + RetainBlock->setMetadata(CopyOnEscapeMDKind, 0); + Class = IC_Retain; + + return true; +} + /// Visit each call, one at a time, and make simplifications without doing any /// additional analysis. void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { @@ -1125,7 +1380,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_InitWeak: case IC_DestroyWeak: { CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0))) { + if (IsNullOrUndef(CI->getArgOperand(0))) { Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), @@ -1146,8 +1401,8 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { case IC_CopyWeak: case IC_MoveWeak: { CallInst *CI = cast<CallInst>(Inst); - if (isNullOrUndef(CI->getArgOperand(0)) || - isNullOrUndef(CI->getArgOperand(1))) { + if (IsNullOrUndef(CI->getArgOperand(0)) || + IsNullOrUndef(CI->getArgOperand(1))) { Changed = true; Type *Ty = CI->getArgOperand(0)->getType(); new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()), @@ -1167,6 +1422,12 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { } break; } + case IC_RetainBlock: + // If we strength reduce an objc_retainBlock to amn objc_retain, continue + // onto the objc_retain peephole optimizations. Otherwise break. + if (!OptimizeRetainBlockCall(F, Inst, Class)) + break; + // FALLTHROUGH case IC_Retain: OptimizeRetainCall(F, Inst); break; @@ -1245,7 +1506,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { const Value *Arg = GetObjCArg(Inst); // ARC calls with null are no-ops. Delete them. - if (isNullOrUndef(Arg)) { + if (IsNullOrUndef(Arg)) { Changed = true; ++NumNoops; DEBUG(dbgs() << "ObjCARCOpt::OptimizeIndividualCalls: ARC calls with " @@ -1280,7 +1541,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (isNullOrUndef(Incoming)) + if (IsNullOrUndef(Incoming)) HasNull = true; else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back()) .getNumSuccessors() != 1) { @@ -1334,7 +1595,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = StripPointerCastsAndObjCCalls(PN->getIncomingValue(i)); - if (!isNullOrUndef(Incoming)) { + if (!IsNullOrUndef(Incoming)) { CallInst *Clone = cast<CallInst>(CInst->clone()); Value *Op = PN->getIncomingValue(i); Instruction *InsertPos = &PN->getIncomingBlock(i)->back(); @@ -1502,21 +1763,21 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, } MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); - S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release); + Sequence NewSeq = ReleaseMetadata ? S_MovableRelease : S_Release; + ANNOTATE_BOTTOMUP(Inst, Arg, S.GetSeq(), NewSeq); + S.ResetSequenceProgress(NewSeq); S.RRI.ReleaseMetadata = ReleaseMetadata; - S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.HasKnownPositiveRefCount(); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); S.RRI.Calls.insert(Inst); - S.SetKnownPositiveRefCount(); break; } case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH + // In OptimizeIndividualCalls, we have strength reduced all optimizable + // objc_retainBlocks to objc_retains. Thus at this point any + // objc_retainBlocks that we see are not optimizable. + break; case IC_Retain: case IC_RetainRV: { Arg = GetObjCArg(Inst); @@ -1524,7 +1785,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, PtrState &S = MyStates.getPtrBottomUpState(Arg); S.SetKnownPositiveRefCount(); - switch (S.GetSeq()) { + Sequence OldSeq = S.GetSeq(); + switch (OldSeq) { case S_Stop: case S_Release: case S_MovableRelease: @@ -1534,10 +1796,8 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_CanRelease: // Don't do retain+release tracking for IC_RetainRV, because it's // better to let it remain as the first instruction after a call. - if (Class != IC_RetainRV) { - S.RRI.IsRetainBlock = Class == IC_RetainBlock; + if (Class != IC_RetainRV) Retains[Inst] = S.RRI; - } S.ClearSequenceProgress(); break; case S_None: @@ -1545,6 +1805,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, case S_Retain: llvm_unreachable("bottom-up pointer in retain state!"); } + ANNOTATE_BOTTOMUP(Inst, Arg, OldSeq, S.GetSeq()); return NestingDetected; } case IC_AutoreleasepoolPop: @@ -1571,10 +1832,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); + S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Use: S.SetSeq(S_CanRelease); + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S.GetSeq()); continue; case S_CanRelease: case S_Release: @@ -1601,10 +1863,11 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, else S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst))); S.SetSeq(S_Use); - } else if (Seq == S_Release && - (Class == IC_User || Class == IC_CallOrUser)) { + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); + } else if (Seq == S_Release && IsUser(Class)) { // Non-movable releases depend on any possible objc pointer use. S.SetSeq(S_Stop); + ANNOTATE_BOTTOMUP(Inst, Ptr, S_Release, S_Stop); assert(S.RRI.ReverseInsertPts.empty()); // As above; handle invoke specially. if (isa<InvokeInst>(Inst)) @@ -1614,8 +1877,10 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, } break; case S_Stop: - if (CanUse(Inst, Ptr, PA, Class)) + if (CanUse(Inst, Ptr, PA, Class)) { S.SetSeq(S_Use); + ANNOTATE_BOTTOMUP(Inst, Ptr, Seq, S_Use); + } break; case S_CanRelease: case S_Use: @@ -1654,6 +1919,21 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, } } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // bottom of the basic block. + for(BBState::ptr_const_iterator I = MyStates.bottom_up_ptr_begin(), + E = MyStates.bottom_up_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBTerminatorAnnotation("llvm.arc.annotation.bottomup.bbend", + BB, Ptr, Seq); + } + } +#endif + + // Visit all the instructions, bottom-up. for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) { Instruction *Inst = llvm::prior(I); @@ -1677,6 +1957,20 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB, NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates); } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // top of the basic block. + for(BBState::ptr_const_iterator I = MyStates.bottom_up_ptr_begin(), + E = MyStates.bottom_up_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBEntranceAnnotation("llvm.arc.annotation.bottomup.bbstart", + BB, Ptr, Seq); + } + } +#endif + return NestingDetected; } @@ -1690,11 +1984,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, switch (Class) { case IC_RetainBlock: - // An objc_retainBlock call with just a use may need to be kept, - // because it may be copying a block from the stack to the heap. - if (!IsRetainBlockOptimizable(Inst)) - break; - // FALLTHROUGH + // In OptimizeIndividualCalls, we have strength reduced all optimizable + // objc_retainBlocks to objc_retains. Thus at this point any + // objc_retainBlocks that we see are not optimizable. + break; case IC_Retain: case IC_RetainRV: { Arg = GetObjCArg(Inst); @@ -1714,9 +2007,9 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, if (S.GetSeq() == S_Retain) NestingDetected = true; + ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_Retain); S.ResetSequenceProgress(S_Retain); - S.RRI.IsRetainBlock = Class == IC_RetainBlock; - S.RRI.KnownSafe = S.IsKnownIncremented(); + S.RRI.KnownSafe = S.HasKnownPositiveRefCount(); S.RRI.Calls.insert(Inst); } @@ -1730,7 +2023,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, Arg = GetObjCArg(Inst); PtrState &S = MyStates.getPtrTopDownState(Arg); - S.ClearRefCount(); + S.ClearKnownPositiveRefCount(); switch (S.GetSeq()) { case S_Retain: @@ -1741,6 +2034,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind); S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall(); Releases[Inst] = S.RRI; + ANNOTATE_TOPDOWN(Inst, Arg, S.GetSeq(), S_None); S.ClearSequenceProgress(); break; case S_None: @@ -1776,10 +2070,11 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible releases. if (CanAlterRefCount(Inst, Ptr, PA, Class)) { - S.ClearRefCount(); + S.ClearKnownPositiveRefCount(); switch (Seq) { case S_Retain: S.SetSeq(S_CanRelease); + ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_CanRelease); assert(S.RRI.ReverseInsertPts.empty()); S.RRI.ReverseInsertPts.insert(Inst); @@ -1801,8 +2096,10 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, // Check for possible direct uses. switch (Seq) { case S_CanRelease: - if (CanUse(Inst, Ptr, PA, Class)) + if (CanUse(Inst, Ptr, PA, Class)) { S.SetSeq(S_Use); + ANNOTATE_TOPDOWN(Inst, Ptr, Seq, S_Use); + } break; case S_Retain: case S_Use: @@ -1843,6 +2140,20 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, } } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // top of the basic block. + for(BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBEntranceAnnotation("llvm.arc.annotation.topdown.bbstart", + BB, Ptr, Seq); + } + } +#endif + // Visit all the instructions, top-down. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { Instruction *Inst = I; @@ -1852,6 +2163,20 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB, NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates); } +#ifdef ARC_ANNOTATIONS + if (EnableARCAnnotations) { + // If ARC Annotations are enabled, output the current state of pointers at the + // bottom of the basic block. + for(BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(), + E = MyStates.top_down_ptr_end(); I != E; ++I) { + Value *Ptr = const_cast<Value*>(I->first); + Sequence Seq = I->second.GetSeq(); + GenerateARCBBTerminatorAnnotation("llvm.arc.annotation.topdown.bbend", + BB, Ptr, Seq); + } + } +#endif + CheckForCFGHazards(BB, BBStates, MyStates); return NestingDetected; } @@ -1991,15 +2316,9 @@ void ObjCARCOpt::MoveCalls(Value *Arg, Value *MyArg = ArgTy == ParamTy ? Arg : new BitCastInst(Arg, ParamTy, "", InsertPt); CallInst *Call = - CallInst::Create(RetainsToMove.IsRetainBlock ? - getRetainBlockCallee(M) : getRetainCallee(M), - MyArg, "", InsertPt); + CallInst::Create(getRetainCallee(M), MyArg, "", InsertPt); Call->setDoesNotThrow(); - if (RetainsToMove.IsRetainBlock) - Call->setMetadata(CopyOnEscapeMDKind, - MDNode::get(M->getContext(), ArrayRef<Value *>())); - else - Call->setTailCall(); + Call->setTailCall(); DEBUG(dbgs() << "ObjCARCOpt::MoveCalls: Inserting new Release: " << *Call << "\n" @@ -2075,7 +2394,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> unsigned OldCount = 0; unsigned NewCount = 0; bool FirstRelease = true; - bool FirstRetain = true; for (;;) { for (SmallVectorImpl<Instruction *>::const_iterator NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) { @@ -2156,16 +2474,6 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState> OldDelta += PathCount; OldCount += PathCount; - // Merge the IsRetainBlock values. - if (FirstRetain) { - RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock; - FirstRetain = false; - } else if (ReleasesToMove.IsRetainBlock != - NewReleaseRetainRRI.IsRetainBlock) - // It's not possible to merge the sequences if one uses - // objc_retain and the other uses objc_retainBlock. - return false; - // Collect the optimal insertion points. if (!KnownSafe) for (SmallPtrSet<Instruction *, 2>::const_iterator @@ -2271,6 +2579,12 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState> ReleasesToMove, Arg, KnownSafe, AnyPairsCompletelyEliminated); +#ifdef ARC_ANNOTATIONS + // Do not move calls if ARC annotations are requested. If we were to move + // calls in this case, we would not be able + PerformMoveCalls = PerformMoveCalls && !EnableARCAnnotations; +#endif // ARC_ANNOTATIONS + if (PerformMoveCalls) { // Ok, everything checks out and we're all set. Let's move/delete some // code! @@ -2392,6 +2706,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) { goto clobbered; case IC_AutoreleasepoolPush: case IC_None: + case IC_IntrinsicUser: case IC_User: // Weak pointers are only modified through the weak entry points // (and arbitrary calls, which could call the weak entry points). @@ -2617,6 +2932,14 @@ bool ObjCARCOpt::doInitialization(Module &M) { M.getContext().getMDKindID("clang.arc.copy_on_escape"); NoObjCARCExceptionsMDKind = M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions"); +#ifdef ARC_ANNOTATIONS + ARCAnnotationBottomUpMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.bottomup"); + ARCAnnotationTopDownMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.topdown"); + ARCAnnotationProvenanceSourceMDKind = + M.getContext().getMDKindID("llvm.arc.annotation.provenancesource"); +#endif // ARC_ANNOTATIONS // Intuitively, objc_retain and others are nocapture, however in practice // they are not, because they return their argument value. And objc_release diff --git a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp index a841c64a9f..03e12d4fd7 100644 --- a/lib/Transforms/ObjCARC/ObjCARCUtil.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCUtil.cpp @@ -72,6 +72,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, return OS << "IC_Call"; case IC_User: return OS << "IC_User"; + case IC_IntrinsicUser: + return OS << "IC_IntrinsicUser"; case IC_None: return OS << "IC_None"; } @@ -81,10 +83,11 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - // No arguments. + // No (mandatory) arguments. if (AI == AE) return StringSwitch<InstructionClass>(F->getName()) .Case("objc_autoreleasePoolPush", IC_AutoreleasepoolPush) + .Case("clang.arc.use", IC_IntrinsicUser) .Default(IC_CallOrUser); // One argument. @@ -142,6 +145,14 @@ InstructionClass llvm::objcarc::GetFunctionClass(const Function *F) { return StringSwitch<InstructionClass>(F->getName()) .Case("objc_moveWeak", IC_MoveWeak) .Case("objc_copyWeak", IC_CopyWeak) + // Ignore annotation calls. This is important to stop the + // optimizer from treating annotations as uses which would + // make the state of the pointers they are attempting to + // elucidate to be incorrect. + .Case("llvm.arc.annotation.topdown.bbstart", IC_None) + .Case("llvm.arc.annotation.topdown.bbend", IC_None) + .Case("llvm.arc.annotation.bottomup.bbstart", IC_None) + .Case("llvm.arc.annotation.bottomup.bbend", IC_None) .Default(IC_CallOrUser); } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c04b447f1c..129af8d45d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1714,7 +1714,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { return true; } -static void patchReplacementInstruction(Value *Repl, Instruction *I) { +static void patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. BinaryOperator *Op = dyn_cast<BinaryOperator>(I); @@ -1756,8 +1756,8 @@ static void patchReplacementInstruction(Value *Repl, Instruction *I) { } } -static void patchAndReplaceAllUsesWith(Value *Repl, Instruction *I) { - patchReplacementInstruction(Repl, I); +static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) { + patchReplacementInstruction(I, Repl); I->replaceAllUsesWith(Repl); } @@ -1919,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { } // Remove it! - patchAndReplaceAllUsesWith(AvailableVal, L); + patchAndReplaceAllUsesWith(L, AvailableVal); if (DepLI->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); markInstructionForDeletion(L); @@ -2260,7 +2260,7 @@ bool GVN::processInstruction(Instruction *I) { } // Remove it! - patchAndReplaceAllUsesWith(repl, I); + patchAndReplaceAllUsesWith(I, repl); if (MD && repl->getType()->getScalarType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); markInstructionForDeletion(I); diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 1601a8d646..5d02c68a7a 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -53,6 +53,7 @@ #define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -64,10 +65,16 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -78,6 +85,23 @@ namespace { bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that must not be merged away + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + public: static char ID; // Pass identification, replacement for typeid. explicit GlobalMerge(const TargetLowering *tli = 0) @@ -87,6 +111,7 @@ namespace { virtual bool doInitialization(Module &M); virtual bool runOnFunction(Function &F); + virtual bool doFinalization(Module &M); const char *getPassName() const { return "Merge internal globals"; @@ -169,6 +194,43 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, return true; } +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); + if (InitList == 0) return; + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the inwoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} bool GlobalMerge::doInitialization(Module &M) { DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, @@ -176,6 +238,7 @@ bool GlobalMerge::doInitialization(Module &M) { const DataLayout *TD = TLI->getDataLayout(); unsigned MaxOffset = TLI->getMaximalGlobalOffset(); bool Changed = false; + setMustKeepGlobalVariables(M); // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), @@ -200,6 +263,10 @@ bool GlobalMerge::doInitialization(Module &M) { I->getName().startswith(".llvm.")) continue; + // Ignore all "required" globals: + if (isMustKeepGlobalVariable(I)) + continue; + if (TD->getTypeAllocSize(Ty) < MaxOffset) { if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) .isBSSLocal()) @@ -221,11 +288,11 @@ bool GlobalMerge::doInitialization(Module &M) { if (I->second.size() > 1) Changed |= doMerge(I->second, M, false, I->first); - // FIXME: This currently breaks the EH processing due to way how the - // typeinfo detection works. We might want to detect the TIs and ignore - // them in the future. - // if (ConstGlobals.size() > 1) - // Changed |= doMerge(ConstGlobals, M, true); + if (EnableGlobalMergeOnConst) + for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator + I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I) + if (I->second.size() > 1) + Changed |= doMerge(I->second, M, true, I->first); return Changed; } @@ -234,6 +301,11 @@ bool GlobalMerge::runOnFunction(Function &F) { return false; } +bool GlobalMerge::doFinalization(Module &M) { + MustKeepGlobalVariables.clear(); + return false; +} + Pass *llvm::createGlobalMergePass(const TargetLowering *tli) { return new GlobalMerge(tli); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 97fff7e782..8e76c78f5a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -535,6 +535,45 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; + // Computing the value outside of the loop brings no benefit if : + // - it is definitely used inside the loop in a way which can not be + // optimized away. + // - no use outside of the loop can take advantage of hoisting the + // computation out of the loop + if (ExitValue->getSCEVType()>=scMulExpr) { + unsigned NumHardInternalUses = 0; + unsigned NumSoftExternalUses = 0; + unsigned NumUses = 0; + for (Value::use_iterator IB=Inst->use_begin(), IE=Inst->use_end(); + IB!=IE && NumUses<=6 ; ++IB) { + Instruction *UseInstr = cast<Instruction>(*IB); + unsigned Opc = UseInstr->getOpcode(); + NumUses++; + if (L->contains(UseInstr)) { + if (Opc == Instruction::Call || Opc == Instruction::Ret) + NumHardInternalUses++; + } else { + if (Opc == Instruction::PHI) { + // Do not count the Phi as a use. LCSSA may have inserted + // plenty of trivial ones. + NumUses--; + for (Value::use_iterator PB=UseInstr->use_begin(), + PE=UseInstr->use_end(); + PB!=PE && NumUses<=6 ; ++PB, ++NumUses) { + unsigned PhiOpc = cast<Instruction>(*PB)->getOpcode(); + if (PhiOpc != Instruction::Call && PhiOpc != Instruction::Ret) + NumSoftExternalUses++; + } + continue; + } + if (Opc != Instruction::Call && Opc != Instruction::Ret) + NumSoftExternalUses++; + } + } + if (NumUses <= 6 && NumHardInternalUses && !NumSoftExternalUses) + continue; + } + Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 9c67e327e2..0b62050b17 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -34,13 +34,9 @@ namespace { } // Possibly eliminate loop L if it is dead. - bool runOnLoop(Loop* L, LPPassManager& LPM); + bool runOnLoop(Loop *L, LPPassManager &LPM); - bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, - bool &Changed, BasicBlock *Preheader); - - virtual void getAnalysisUsage(AnalysisUsage& AU) const { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<DominatorTree>(); AU.addRequired<LoopInfo>(); AU.addRequired<ScalarEvolution>(); @@ -53,6 +49,12 @@ namespace { AU.addPreservedID(LoopSimplifyID); AU.addPreservedID(LCSSAID); } + + private: + bool isLoopDead(Loop *L, SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, + bool &Changed, BasicBlock *Preheader); + }; } @@ -67,18 +69,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopDeletion, "loop-deletion", "Delete dead loops", false, false) -Pass* llvm::createLoopDeletionPass() { +Pass *llvm::createLoopDeletionPass() { return new LoopDeletion(); } -/// IsLoopDead - Determined if a loop is dead. This assumes that we've already +/// isLoopDead - Determined if a loop is dead. This assumes that we've already /// checked for unique exit and exiting blocks, and that the code is in LCSSA /// form. -bool LoopDeletion::IsLoopDead(Loop* L, - SmallVector<BasicBlock*, 4>& exitingBlocks, - SmallVector<BasicBlock*, 4>& exitBlocks, +bool LoopDeletion::isLoopDead(Loop *L, + SmallVector<BasicBlock*, 4> &exitingBlocks, + SmallVector<BasicBlock*, 4> &exitBlocks, bool &Changed, BasicBlock *Preheader) { - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Make sure that all PHI entries coming from the loop are loop invariant. // Because the code is in LCSSA form, any values used outside of the loop @@ -86,19 +88,19 @@ bool LoopDeletion::IsLoopDead(Loop* L, // sufficient to guarantee that no loop-variant values are used outside // of the loop. BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { - Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]); + while (PHINode *P = dyn_cast<PHINode>(BI)) { + Value *incoming = P->getIncomingValueForBlock(exitingBlocks[0]); // Make sure all exiting blocks produce the same incoming value for the exit // block. If there are different incoming values for different exiting // blocks, then it is impossible to statically determine which value should // be used. - for (unsigned i = 1; i < exitingBlocks.size(); ++i) { + for (unsigned i = 1, e = exitingBlocks.size(); i < e; ++i) { if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) return false; } - if (Instruction* I = dyn_cast<Instruction>(incoming)) + if (Instruction *I = dyn_cast<Instruction>(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; @@ -127,10 +129,10 @@ bool LoopDeletion::IsLoopDead(Loop* L, /// so could change the halting/non-halting nature of a program. /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA /// in order to make various safety checks work. -bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { +bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) { // We can only remove the loop if there is a preheader that we can // branch from after removing it. - BasicBlock* preheader = L->getLoopPreheader(); + BasicBlock *preheader = L->getLoopPreheader(); if (!preheader) return false; @@ -158,19 +160,19 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, we have to check that the loop really is dead. bool Changed = false; - if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) + if (!isLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) return Changed; // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. - ScalarEvolution& SE = getAnalysis<ScalarEvolution>(); + ScalarEvolution &SE = getAnalysis<ScalarEvolution>(); const SCEV *S = SE.getMaxBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(S)) return Changed; // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. - BasicBlock* exitBlock = exitBlocks[0]; + BasicBlock *exitBlock = exitBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't @@ -182,14 +184,14 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { SE.forgetLoop(L); // Connect the preheader directly to the exit block. - TerminatorInst* TI = preheader->getTerminator(); + TerminatorInst *TI = preheader->getTerminator(); TI->replaceUsesOfWith(L->getHeader(), exitBlock); // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. - BasicBlock* exitingBlock = exitingBlocks[0]; + BasicBlock *exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); - while (PHINode* P = dyn_cast<PHINode>(BI)) { + while (PHINode *P = dyn_cast<PHINode>(BI)) { int j = P->getBasicBlockIndex(exitingBlock); assert(j >= 0 && "Can't find exiting block in exit block's phi node!"); P->setIncomingBlock(j, preheader); @@ -200,7 +202,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Update the dominator tree and remove the instructions and blocks that will // be deleted from the reference counting scheme. - DominatorTree& DT = getAnalysis<DominatorTree>(); + DominatorTree &DT = getAnalysis<DominatorTree>(); SmallVector<DomTreeNode*, 8> ChildNodes; for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); LI != LE; ++LI) { @@ -230,7 +232,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Finally, the blocks from loopinfo. This has to happen late because // otherwise our loop iterators won't work. - LoopInfo& loopInfo = getAnalysis<LoopInfo>(); + LoopInfo &loopInfo = getAnalysis<LoopInfo>(); SmallPtrSet<BasicBlock*, 8> blocks; blocks.insert(L->block_begin(), L->block_end()); for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(), diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 4e4cb86464..73e44d7edf 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -895,7 +895,7 @@ void Cost::RatePrimaryRegister(const SCEV *Reg, } if (Regs.insert(Reg)) { RateRegister(Reg, Regs, L, SE, DT); - if (isLoser()) + if (LoserRegs && isLoser()) LoserRegs->insert(Reg); } } @@ -1895,15 +1895,13 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { if (ICmpInst::isTrueWhenEqual(Pred)) { // Look for n+1, and grab n. if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) - if (isa<ConstantInt>(BO->getOperand(1)) && - cast<ConstantInt>(BO->getOperand(1))->isOne() && - SE.getSCEV(BO->getOperand(0)) == MaxRHS) - NewRHS = BO->getOperand(0); + if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) + if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) + NewRHS = BO->getOperand(0); if (!NewRHS) return Cond; } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) @@ -2716,6 +2714,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // by LSR. const IVInc &Head = Chain.Incs[0]; User::op_iterator IVOpEnd = Head.UserInst->op_end(); + // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); Value *IVSrc = 0; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 0da3746950..1f343136e5 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -110,6 +110,51 @@ namespace { } }; }; + + /// Utility class representing a non-constant Xor-operand. We classify + /// non-constant Xor-Operands into two categories: + /// C1) The operand is in the form "X & C", where C is a constant and C != ~0 + /// C2) + /// C2.1) The operand is in the form of "X | C", where C is a non-zero + /// constant. + /// C2.2) Any operand E which doesn't fall into C1 and C2.1, we view this + /// operand as "E | 0" + class XorOpnd { + public: + XorOpnd(Value *V); + const XorOpnd &operator=(const XorOpnd &That); + + bool isInvalid() const { return SymbolicPart == 0; } + bool isOrExpr() const { return isOr; } + Value *getValue() const { return OrigVal; } + Value *getSymbolicPart() const { return SymbolicPart; } + unsigned getSymbolicRank() const { return SymbolicRank; } + const APInt &getConstPart() const { return ConstPart; } + + void Invalidate() { SymbolicPart = OrigVal = 0; } + void setSymbolicRank(unsigned R) { SymbolicRank = R; } + + // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank. + // The purpose is twofold: + // 1) Cluster together the operands sharing the same symbolic-value. + // 2) Operand having smaller symbolic-value-rank is permuted earlier, which + // could potentially shorten crital path, and expose more loop-invariants. + // Note that values' rank are basically defined in RPO order (FIXME). + // So, if Rank(X) < Rank(Y) < Rank(Z), it means X is defined earlier + // than Y which is defined earlier than Z. Permute "x | 1", "Y & 2", + // "z" in the order of X-Y-Z is better than any other orders. + struct PtrSortFunctor { + bool operator()(XorOpnd * const &LHS, XorOpnd * const &RHS) { + return LHS->getSymbolicRank() < RHS->getSymbolicRank(); + } + }; + private: + Value *OrigVal; + Value *SymbolicPart; + APInt ConstPart; + unsigned SymbolicRank; + bool isOr; + }; } namespace { @@ -137,6 +182,11 @@ namespace { Value *OptimizeExpression(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops); Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); + Value *OptimizeXor(Instruction *I, SmallVectorImpl<ValueEntry> &Ops); + bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, APInt &ConstOpnd, + Value *&Res); + bool CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, + APInt &ConstOpnd, Value *&Res); bool collectMultiplyFactors(SmallVectorImpl<ValueEntry> &Ops, SmallVectorImpl<Factor> &Factors); Value *buildMinimalMultiplyDAG(IRBuilder<> &Builder, @@ -148,6 +198,42 @@ namespace { }; } +XorOpnd::XorOpnd(Value *V) { + assert(!isa<ConstantInt>(V) && "No ConstantInt"); + OrigVal = V; + Instruction *I = dyn_cast<Instruction>(V); + SymbolicRank = 0; + + if (I && (I->getOpcode() == Instruction::Or || + I->getOpcode() == Instruction::And)) { + Value *V0 = I->getOperand(0); + Value *V1 = I->getOperand(1); + if (isa<ConstantInt>(V0)) + std::swap(V0, V1); + + if (ConstantInt *C = dyn_cast<ConstantInt>(V1)) { + ConstPart = C->getValue(); + SymbolicPart = V0; + isOr = (I->getOpcode() == Instruction::Or); + return; + } + } + + // view the operand as "V | 0" + SymbolicPart = V; + ConstPart = APInt::getNullValue(V->getType()->getIntegerBitWidth()); + isOr = true; +} + +const XorOpnd &XorOpnd::operator=(const XorOpnd &That) { + OrigVal = That.OrigVal; + SymbolicPart = That.SymbolicPart; + ConstPart = That.ConstPart; + SymbolicRank = That.SymbolicRank; + isOr = That.isOr; + return *this; +} + char Reassociate::ID = 0; INITIALIZE_PASS(Reassociate, "reassociate", "Reassociate expressions", false, false) @@ -1040,6 +1126,240 @@ static Value *OptimizeAndOrXor(unsigned Opcode, return 0; } +/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and +/// instruction with the given two operands, and return the resulting +/// instruction. There are two special cases: 1) if the constant operand is 0, +/// it will return NULL. 2) if the constant is ~0, the symbolic operand will +/// be returned. +static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, + const APInt &ConstOpnd) { + if (ConstOpnd != 0) { + if (!ConstOpnd.isAllOnesValue()) { + LLVMContext &Ctx = Opnd->getType()->getContext(); + Instruction *I; + I = BinaryOperator::CreateAnd(Opnd, ConstantInt::get(Ctx, ConstOpnd), + "and.ra", InsertBefore); + I->setDebugLoc(InsertBefore->getDebugLoc()); + return I; + } + return Opnd; + } + return 0; +} + +// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" +// into "R ^ C", where C would be 0, and R is a symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned +// via "Res" and "ConstOpnd", respectively; otherwise, false is returned, +// and both "Res" and "ConstOpnd" remain unchanged. +// +bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, + APInt &ConstOpnd, Value *&Res) { + // Xor-Rule 1: (x | c1) ^ c2 = (x | c1) ^ (c1 ^ c1) ^ c2 + // = ((x | c1) ^ c1) ^ (c1 ^ c2) + // = (x & ~c1) ^ (c1 ^ c2) + // It is useful only when c1 == c2. + if (Opnd1->isOrExpr() && Opnd1->getConstPart() != 0) { + if (!Opnd1->getValue()->hasOneUse()) + return false; + + const APInt &C1 = Opnd1->getConstPart(); + if (C1 != ConstOpnd) + return false; + + Value *X = Opnd1->getSymbolicPart(); + Res = createAndInstr(I, X, ~C1); + // ConstOpnd was C2, now C1 ^ C2. + ConstOpnd ^= C1; + + if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) + RedoInsts.insert(T); + return true; + } + return false; +} + + +// Helper function of OptimizeXor(). It tries to simplify +// "Opnd1 ^ Opnd2 ^ ConstOpnd" into "R ^ C", where C would be 0, and R is a +// symbolic value. +// +// If it was successful, true is returned, and the "R" and "C" is returned +// via "Res" and "ConstOpnd", respectively (If the entire expression is +// evaluated to a constant, the Res is set to NULL); otherwise, false is +// returned, and both "Res" and "ConstOpnd" remain unchanged. +bool Reassociate::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1, XorOpnd *Opnd2, + APInt &ConstOpnd, Value *&Res) { + Value *X = Opnd1->getSymbolicPart(); + if (X != Opnd2->getSymbolicPart()) + return false; + + const APInt &C1 = Opnd1->getConstPart(); + const APInt &C2 = Opnd2->getConstPart(); + + // This many instruction become dead.(At least "Opnd1 ^ Opnd2" will die.) + int DeadInstNum = 1; + if (Opnd1->getValue()->hasOneUse()) + DeadInstNum++; + if (Opnd2->getValue()->hasOneUse()) + DeadInstNum++; + + // Xor-Rule 2: + // (x | c1) ^ (x & c2) + // = (x|c1) ^ (x&c2) ^ (c1 ^ c1) = ((x|c1) ^ c1) ^ (x & c2) ^ c1 + // = (x & ~c1) ^ (x & c2) ^ c1 // Xor-Rule 1 + // = (x & c3) ^ c1, where c3 = ~c1 ^ c2 // Xor-rule 3 + // + if (Opnd1->isOrExpr() != Opnd2->isOrExpr()) { + if (Opnd2->isOrExpr()) + std::swap(Opnd1, Opnd2); + + APInt C3((~C1) ^ C2); + + // Do not increase code size! + if (C3 != 0 && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (NewInstNum > DeadInstNum) + return false; + } + + Res = createAndInstr(I, X, C3); + ConstOpnd ^= C1; + + } else if (Opnd1->isOrExpr()) { + // Xor-Rule 3: (x | c1) ^ (x | c2) = (x & c3) ^ c3 where c3 = c1 ^ c2 + // + APInt C3 = C1 ^ C2; + + // Do not increase code size + if (C3 != 0 && !C3.isAllOnesValue()) { + int NewInstNum = ConstOpnd != 0 ? 1 : 2; + if (NewInstNum > DeadInstNum) + return false; + } + + Res = createAndInstr(I, X, C3); + ConstOpnd ^= C3; + } else { + // Xor-Rule 4: (x & c1) ^ (x & c2) = (x & (c1^c2)) + // + APInt C3 = C1 ^ C2; + Res = createAndInstr(I, X, C3); + } + + // Put the original operands in the Redo list; hope they will be deleted + // as dead code. + if (Instruction *T = dyn_cast<Instruction>(Opnd1->getValue())) + RedoInsts.insert(T); + if (Instruction *T = dyn_cast<Instruction>(Opnd2->getValue())) + RedoInsts.insert(T); + + return true; +} + +/// Optimize a series of operands to an 'xor' instruction. If it can be reduced +/// to a single Value, it is returned, otherwise the Ops list is mutated as +/// necessary. +Value *Reassociate::OptimizeXor(Instruction *I, + SmallVectorImpl<ValueEntry> &Ops) { + if (Value *V = OptimizeAndOrXor(Instruction::Xor, Ops)) + return V; + + if (Ops.size() == 1) + return 0; + + SmallVector<XorOpnd, 8> Opnds; + SmallVector<XorOpnd*, 8> OpndPtrs; + Type *Ty = Ops[0].Op->getType(); + APInt ConstOpnd(Ty->getIntegerBitWidth(), 0); + + // Step 1: Convert ValueEntry to XorOpnd + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + Value *V = Ops[i].Op; + if (!isa<ConstantInt>(V)) { + XorOpnd O(V); + O.setSymbolicRank(getRank(O.getSymbolicPart())); + Opnds.push_back(O); + OpndPtrs.push_back(&Opnds.back()); + } else + ConstOpnd ^= cast<ConstantInt>(V)->getValue(); + } + + // Step 2: Sort the Xor-Operands in a way such that the operands containing + // the same symbolic value cluster together. For instance, the input operand + // sequence ("x | 123", "y & 456", "x & 789") will be sorted into: + // ("x | 123", "x & 789", "y & 456"). + std::sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); + + // Step 3: Combine adjacent operands + XorOpnd *PrevOpnd = 0; + bool Changed = false; + for (unsigned i = 0, e = Opnds.size(); i < e; i++) { + XorOpnd *CurrOpnd = OpndPtrs[i]; + // The combined value + Value *CV; + + // Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd" + if (ConstOpnd != 0 && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) { + Changed = true; + if (CV) + *CurrOpnd = XorOpnd(CV); + else { + CurrOpnd->Invalidate(); + continue; + } + } + + if (!PrevOpnd || CurrOpnd->getSymbolicPart() != PrevOpnd->getSymbolicPart()) { + PrevOpnd = CurrOpnd; + continue; + } + + // step 3.2: When previous and current operands share the same symbolic + // value, try to simplify "PrevOpnd ^ CurrOpnd ^ ConstOpnd" + // + if (CombineXorOpnd(I, CurrOpnd, PrevOpnd, ConstOpnd, CV)) { + // Remove previous operand + PrevOpnd->Invalidate(); + if (CV) { + *CurrOpnd = XorOpnd(CV); + PrevOpnd = CurrOpnd; + } else { + CurrOpnd->Invalidate(); + PrevOpnd = 0; + } + Changed = true; + } + } + + // Step 4: Reassemble the Ops + if (Changed) { + Ops.clear(); + for (unsigned int i = 0, e = Opnds.size(); i < e; i++) { + XorOpnd &O = Opnds[i]; + if (O.isInvalid()) + continue; + ValueEntry VE(getRank(O.getValue()), O.getValue()); + Ops.push_back(VE); + } + if (ConstOpnd != 0) { + Value *C = ConstantInt::get(Ty->getContext(), ConstOpnd); + ValueEntry VE(getRank(C), C); + Ops.push_back(VE); + } + int Sz = Ops.size(); + if (Sz == 1) + return Ops.back().Op; + else if (Sz == 0) { + assert(ConstOpnd == 0); + return ConstantInt::get(Ty->getContext(), ConstOpnd); + } + } + + return 0; +} + /// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This /// optimizes based on identities. If it can be reduced to a single Value, it /// is returned, otherwise the Ops list is mutated as necessary. @@ -1431,11 +1751,15 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, default: break; case Instruction::And: case Instruction::Or: - case Instruction::Xor: if (Value *Result = OptimizeAndOrXor(Opcode, Ops)) return Result; break; + case Instruction::Xor: + if (Value *Result = OptimizeXor(I, Ops)) + return Result; + break; + case Instruction::Add: if (Value *Result = OptimizeAdd(I, Ops)) return Result; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 810a553c74..f6bb365216 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -57,11 +57,15 @@ using namespace llvm; STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); -STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); -STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); +STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); +STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions"); +STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses found"); +STATISTIC(MaxPartitionUsesPerAlloca, "Maximum number of partition uses"); +STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); +STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); -STATISTIC(NumDeleted, "Number of instructions deleted"); -STATISTIC(NumVectorized, "Number of vectorized aggregates"); +STATISTIC(NumDeleted, "Number of instructions deleted"); +STATISTIC(NumVectorized, "Number of vectorized aggregates"); /// Hidden option to force the pass to not use DomTree and mem2reg, instead /// forming SSA values through the SSAUpdater infrastructure. @@ -69,112 +73,167 @@ static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); namespace { -/// \brief Alloca partitioning representation. -/// -/// This class represents a partitioning of an alloca into slices, and -/// information about the nature of uses of each slice of the alloca. The goal -/// is that this information is sufficient to decide if and how to split the -/// alloca apart and replace slices with scalars. It is also intended that this -/// structure can capture the relevant information needed both to decide about -/// and to enact these transformations. -class AllocaPartitioning { +/// \brief A custom IRBuilder inserter which prefixes all names if they are +/// preserved. +template <bool preserveNames = true> +class IRBuilderPrefixedInserter : + public IRBuilderDefaultInserter<preserveNames> { + std::string Prefix; + public: - /// \brief A common base class for representing a half-open byte range. - struct ByteRange { - /// \brief The beginning offset of the range. - uint64_t BeginOffset; + void SetNamePrefix(const Twine &P) { Prefix = P.str(); } - /// \brief The ending offset, not included in the range. - uint64_t EndOffset; +protected: + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, + BasicBlock::iterator InsertPt) const { + IRBuilderDefaultInserter<preserveNames>::InsertHelper( + I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt); + } +}; - ByteRange() : BeginOffset(), EndOffset() {} - ByteRange(uint64_t BeginOffset, uint64_t EndOffset) - : BeginOffset(BeginOffset), EndOffset(EndOffset) {} +// Specialization for not preserving the name is trivial. +template <> +class IRBuilderPrefixedInserter<false> : + public IRBuilderDefaultInserter<false> { +public: + void SetNamePrefix(const Twine &P) {} +}; - /// \brief Support for ordering ranges. - /// - /// This provides an ordering over ranges such that start offsets are - /// always increasing, and within equal start offsets, the end offsets are - /// decreasing. Thus the spanning range comes first in a cluster with the - /// same start position. - bool operator<(const ByteRange &RHS) const { - if (BeginOffset < RHS.BeginOffset) return true; - if (BeginOffset > RHS.BeginOffset) return false; - if (EndOffset > RHS.EndOffset) return true; - return false; - } +/// \brief Provide a typedef for IRBuilder that drops names in release builds. +#ifndef NDEBUG +typedef llvm::IRBuilder<true, ConstantFolder, + IRBuilderPrefixedInserter<true> > IRBuilderTy; +#else +typedef llvm::IRBuilder<false, ConstantFolder, + IRBuilderPrefixedInserter<false> > IRBuilderTy; +#endif +} - /// \brief Support comparison with a single offset to allow binary searches. - friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { - return LHS.BeginOffset < RHSOffset; - } +namespace { +/// \brief A common base class for representing a half-open byte range. +struct ByteRange { + /// \brief The beginning offset of the range. + uint64_t BeginOffset; - friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, - const ByteRange &RHS) { - return LHSOffset < RHS.BeginOffset; - } + /// \brief The ending offset, not included in the range. + uint64_t EndOffset; - bool operator==(const ByteRange &RHS) const { - return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; - } - bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } - }; + ByteRange() : BeginOffset(), EndOffset() {} + ByteRange(uint64_t BeginOffset, uint64_t EndOffset) + : BeginOffset(BeginOffset), EndOffset(EndOffset) {} - /// \brief A partition of an alloca. + /// \brief Support for ordering ranges. /// - /// This structure represents a contiguous partition of the alloca. These are - /// formed by examining the uses of the alloca. During formation, they may - /// overlap but once an AllocaPartitioning is built, the Partitions within it - /// are all disjoint. - struct Partition : public ByteRange { - /// \brief Whether this partition is splittable into smaller partitions. - /// - /// We flag partitions as splittable when they are formed entirely due to - /// accesses by trivially splittable operations such as memset and memcpy. - bool IsSplittable; + /// This provides an ordering over ranges such that start offsets are + /// always increasing, and within equal start offsets, the end offsets are + /// decreasing. Thus the spanning range comes first in a cluster with the + /// same start position. + bool operator<(const ByteRange &RHS) const { + if (BeginOffset < RHS.BeginOffset) return true; + if (BeginOffset > RHS.BeginOffset) return false; + if (EndOffset > RHS.EndOffset) return true; + return false; + } - /// \brief Test whether a partition has been marked as dead. - bool isDead() const { - if (BeginOffset == UINT64_MAX) { - assert(EndOffset == UINT64_MAX); - return true; - } - return false; - } + /// \brief Support comparison with a single offset to allow binary searches. + friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { + return LHS.BeginOffset < RHSOffset; + } + + friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, + const ByteRange &RHS) { + return LHSOffset < RHS.BeginOffset; + } - /// \brief Kill a partition. - /// This is accomplished by setting both its beginning and end offset to - /// the maximum possible value. - void kill() { - assert(!isDead() && "He's Dead, Jim!"); - BeginOffset = EndOffset = UINT64_MAX; + bool operator==(const ByteRange &RHS) const { + return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; + } + bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } +}; + +/// \brief A partition of an alloca. +/// +/// This structure represents a contiguous partition of the alloca. These are +/// formed by examining the uses of the alloca. During formation, they may +/// overlap but once an AllocaPartitioning is built, the Partitions within it +/// are all disjoint. +struct Partition : public ByteRange { + /// \brief Whether this partition is splittable into smaller partitions. + /// + /// We flag partitions as splittable when they are formed entirely due to + /// accesses by trivially splittable operations such as memset and memcpy. + bool IsSplittable; + + /// \brief Test whether a partition has been marked as dead. + bool isDead() const { + if (BeginOffset == UINT64_MAX) { + assert(EndOffset == UINT64_MAX); + return true; } + return false; + } - Partition() : ByteRange(), IsSplittable() {} - Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) - : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} - }; + /// \brief Kill a partition. + /// This is accomplished by setting both its beginning and end offset to + /// the maximum possible value. + void kill() { + assert(!isDead() && "He's Dead, Jim!"); + BeginOffset = EndOffset = UINT64_MAX; + } + + Partition() : ByteRange(), IsSplittable() {} + Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) + : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} +}; + +/// \brief A particular use of a partition of the alloca. +/// +/// This structure is used to associate uses of a partition with it. They +/// mark the range of bytes which are referenced by a particular instruction, +/// and includes a handle to the user itself and the pointer value in use. +/// The bounds of these uses are determined by intersecting the bounds of the +/// memory use itself with a particular partition. As a consequence there is +/// intentionally overlap between various uses of the same partition. +class PartitionUse : public ByteRange { + /// \brief Combined storage for both the Use* and split state. + PointerIntPair<Use*, 1, bool> UsePtrAndIsSplit; + +public: + PartitionUse() : ByteRange(), UsePtrAndIsSplit() {} + PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U, + bool IsSplit) + : ByteRange(BeginOffset, EndOffset), UsePtrAndIsSplit(U, IsSplit) {} - /// \brief A particular use of a partition of the alloca. + /// \brief The use in question. Provides access to both user and used value. /// - /// This structure is used to associate uses of a partition with it. They - /// mark the range of bytes which are referenced by a particular instruction, - /// and includes a handle to the user itself and the pointer value in use. - /// The bounds of these uses are determined by intersecting the bounds of the - /// memory use itself with a particular partition. As a consequence there is - /// intentionally overlap between various uses of the same partition. - struct PartitionUse : public ByteRange { - /// \brief The use in question. Provides access to both user and used value. - /// - /// Note that this may be null if the partition use is *dead*, that is, it - /// should be ignored. - Use *U; + /// Note that this may be null if the partition use is *dead*, that is, it + /// should be ignored. + Use *getUse() const { return UsePtrAndIsSplit.getPointer(); } - PartitionUse() : ByteRange(), U() {} - PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U) - : ByteRange(BeginOffset, EndOffset), U(U) {} - }; + /// \brief Set the use for this partition use range. + void setUse(Use *U) { UsePtrAndIsSplit.setPointer(U); } + /// \brief Whether this use is split across multiple partitions. + bool isSplit() const { return UsePtrAndIsSplit.getInt(); } +}; +} + +namespace llvm { +template <> struct isPodLike<Partition> : llvm::true_type {}; +template <> struct isPodLike<PartitionUse> : llvm::true_type {}; +} + +namespace { +/// \brief Alloca partitioning representation. +/// +/// This class represents a partitioning of an alloca into slices, and +/// information about the nature of uses of each slice of the alloca. The goal +/// is that this information is sufficient to decide if and how to split the +/// alloca apart and replace slices with scalars. It is also intended that this +/// structure can capture the relevant information needed both to decide about +/// and to enact these transformations. +class AllocaPartitioning { +public: /// \brief Construct a partitioning of a particular alloca. /// /// Construction does most of the work for partitioning the alloca. This @@ -456,10 +515,10 @@ private: // Clamp the end offset to the end of the allocation. Note that this is // formulated to handle even the case where "BeginOffset + Size" overflows. - // NOTE! This may appear superficially to be something we could ignore - // entirely, but that is not so! There may be PHI-node uses where some - // instructions are dead but not others. We can't completely ignore the - // PHI node, and so have to record at least the information here. + // This may appear superficially to be something we could ignore entirely, + // but that is not so! There may be widened loads or PHI-node uses where + // some instructions are dead but not others. We can't completely ignore + // them, and so have to record at least the information here. assert(AllocSize >= BeginOffset); // Established above. if (Size > AllocSize - BeginOffset) { DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset @@ -474,33 +533,17 @@ private: } void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset, - bool IsVolatile) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. We also try to handle cases which might run the - // risk of overflow. - // FIXME: We should instead consider the pointer to have escaped if this - // function is being instrumented for addressing bugs or race conditions. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) { - DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " - << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset - << " which extends past the end of the " << AllocSize - << " byte alloca:\n" - << " alloca: " << P.AI << "\n" - << " use: " << I << "\n"); - return; - } - + uint64_t Size, bool IsVolatile) { // We allow splitting of loads and stores where the type is an integer type - // and which cover the entire alloca. Such integer loads and stores - // often require decomposition into fine grained loads and stores. - bool IsSplittable = false; - if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) - IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8; + // and cover the entire alloca. This prevents us from splitting over + // eagerly. + // FIXME: In the great blue eventually, we should eagerly split all integer + // loads and stores, and then have a separate step that merges adjacent + // alloca partitions into a single partition suitable for integer widening. + // Or we should skip the merge step and rely on GVN and other passes to + // merge adjacent loads and stores that survive mem2reg. + bool IsSplittable = + Ty->isIntegerTy() && !IsVolatile && Offset == 0 && Size >= AllocSize; insertUse(I, Offset, Size, IsSplittable); } @@ -512,7 +555,8 @@ private: if (!IsOffsetKnown) return PI.setAborted(&LI); - return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile()); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile()); } void visitStoreInst(StoreInst &SI) { @@ -522,9 +566,28 @@ private: if (!IsOffsetKnown) return PI.setAborted(&SI); + uint64_t Size = DL.getTypeStoreSize(ValOp->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. We also try to handle cases which might run the + // risk of overflow. + // FIXME: We should instead consider the pointer to have escaped if this + // function is being instrumented for addressing bugs or race conditions. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) { + DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset + << " which extends past the end of the " << AllocSize + << " byte alloca:\n" + << " alloca: " << P.AI << "\n" + << " use: " << SI << "\n"); + return; + } + assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); - handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile()); + handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); } @@ -795,13 +858,14 @@ private: EndOffset = AllocSize; // NB: This only works if we have zero overlapping partitions. - iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset); - if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset) - B = llvm::prior(B); - for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset; - ++I) { + iterator I = std::lower_bound(P.begin(), P.end(), BeginOffset); + if (I != P.begin() && llvm::prior(I)->EndOffset > BeginOffset) + I = llvm::prior(I); + iterator E = P.end(); + bool IsSplit = llvm::next(I) != E && llvm::next(I)->BeginOffset < EndOffset; + for (; I != E && I->BeginOffset < EndOffset; ++I) { PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), - std::min(I->EndOffset, EndOffset), U); + std::min(I->EndOffset, EndOffset), U, IsSplit); P.use_push_back(I, NewPU); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[U] @@ -809,20 +873,6 @@ private: } } - void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) { - uint64_t Size = DL.getTypeStoreSize(Ty); - - // If this memory access can be shown to *statically* extend outside the - // bounds of of the allocation, it's behavior is undefined, so simply - // ignore it. Note that this is more strict than the generic clamping - // behavior of insertUse. - if (Offset.isNegative() || Size > AllocSize || - Offset.ugt(AllocSize - Size)) - return markAsDead(I); - - insertUse(I, Offset, Size); - } - void visitBitCastInst(BitCastInst &BC) { if (BC.use_empty()) return markAsDead(BC); @@ -839,12 +889,23 @@ private: void visitLoadInst(LoadInst &LI) { assert(IsOffsetKnown); - handleLoadOrStore(LI.getType(), LI, Offset); + uint64_t Size = DL.getTypeStoreSize(LI.getType()); + insertUse(LI, Offset, Size); } void visitStoreInst(StoreInst &SI) { assert(IsOffsetKnown); - handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); + uint64_t Size = DL.getTypeStoreSize(SI.getOperand(0)->getType()); + + // If this memory access can be shown to *statically* extend outside the + // bounds of of the allocation, it's behavior is undefined, so simply + // ignore it. Note that this is more strict than the generic clamping + // behavior of insertUse. + if (Offset.isNegative() || Size > AllocSize || + Offset.ugt(AllocSize - Size)) + return markAsDead(SI); + + insertUse(SI, Offset, Size); } void visitMemSetInst(MemSetInst &II) { @@ -868,7 +929,7 @@ private: uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - Offset.getLimitedValue(); - MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; + const MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd && Offsets.DestBegin == Offsets.SourceBegin) return markAsDead(II); // Skip identity transfers without side-effects. @@ -1077,6 +1138,10 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) splitAndMergePartitions(); } + // Record how many partitions we end up with. + NumAllocaPartitions += Partitions.size(); + MaxPartitionsPerAlloca = std::max<unsigned>(Partitions.size(), MaxPartitionsPerAlloca); + // Now build up the user lists for each of these disjoint partitions by // re-walking the recursive users of the alloca. Uses.resize(Partitions.size()); @@ -1084,22 +1149,31 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI) PtrI = UB.visitPtr(AI); assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!"); assert(!PtrI.isAborted() && "Early aborted the visit of the pointer."); + + unsigned NumUses = 0; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) + for (unsigned Idx = 0, Size = Uses.size(); Idx != Size; ++Idx) + NumUses += Uses[Idx].size(); +#endif + NumAllocaPartitionUses += NumUses; + MaxPartitionUsesPerAlloca = std::max<unsigned>(NumUses, MaxPartitionUsesPerAlloca); } Type *AllocaPartitioning::getCommonType(iterator I) const { Type *Ty = 0; for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + Use *U = UI->getUse(); + if (!U) continue; // Skip dead uses. - if (isa<IntrinsicInst>(*UI->U->getUser())) + if (isa<IntrinsicInst>(*U->getUser())) continue; if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) continue; Type *UserTy = 0; - if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) UserTy = LI->getType(); - else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) + else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) UserTy = SI->getValueOperand()->getType(); else return 0; // Bail if we have weird uses. @@ -1139,11 +1213,12 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, StringRef Indent) const { for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { - if (!UI->U) + if (!UI->getUse()) continue; // Skip dead uses. OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " - << "used by: " << *UI->U->getUser() << "\n"; - if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) { + << "used by: " << *UI->getUse()->getUser() << "\n"; + if (MemTransferInst *II = + dyn_cast<MemTransferInst>(UI->getUse()->getUser())) { const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); bool IsDest; if (!MTO.IsSplittable) @@ -1243,12 +1318,12 @@ public: // may be zapped by an optimization pass in future. if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) Arg = dyn_cast<Argument>(ZExt->getOperand(0)); - if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) Arg = dyn_cast<Argument>(SExt->getOperand(0)); if (!Arg) - Arg = SI->getOperand(0); + Arg = SI->getValueOperand(); } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - Arg = LI->getOperand(0); + Arg = LI->getPointerOperand(); } else { continue; } @@ -1374,11 +1449,11 @@ public: // may be grown during speculation. However, we never need to re-visit the // new uses, and so we can use the initial size bound. for (unsigned Idx = 0, Size = P.use_size(PI); Idx != Size; ++Idx) { - const AllocaPartitioning::PartitionUse &PU = P.getUse(PI, Idx); - if (!PU.U) + const PartitionUse &PU = P.getUse(PI, Idx); + if (!PU.getUse()) continue; // Skip dead use. - visit(cast<Instruction>(PU.U->getUser())); + visit(cast<Instruction>(PU.getUse()->getUser())); } } @@ -1472,7 +1547,7 @@ private: assert(!Loads.empty()); Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); - IRBuilder<> PHIBuilder(&PN); + IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -1495,7 +1570,7 @@ private: TerminatorInst *TI = Pred->getTerminator(); Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); Value *InVal = PN.getIncomingValue(Idx); - IRBuilder<> PredBuilder(TI); + IRBuilderTy PredBuilder(TI); LoadInst *Load = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + @@ -1522,8 +1597,8 @@ private: // inside the load. AllocaPartitioning::use_iterator UI = P.findPartitionUseForPHIOrSelectOperand(InUse); - assert(isa<PHINode>(*UI->U->getUser())); - UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); + assert(isa<PHINode>(*UI->getUse()->getUser())); + UI->setUse(&Load->getOperandUse(Load->getPointerOperandIndex())); } DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); } @@ -1576,10 +1651,10 @@ private: if (!isSafeSelectToSpeculate(SI, Loads)) return; - IRBuilder<> IRB(&SI); + IRBuilderTy IRB(&SI); Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; AllocaPartitioning::iterator PIs[2]; - AllocaPartitioning::PartitionUse PUs[2]; + PartitionUse PUs[2]; for (unsigned i = 0, e = 2; i != e; ++i) { PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); if (PIs[i] != P.end()) { @@ -1590,7 +1665,7 @@ private: PUs[i] = *UI; // Clear out the use here so that the offsets into the use list remain // stable but this use is ignored when rewriting. - UI->U = 0; + UI->setUse(0); } } @@ -1622,8 +1697,8 @@ private: for (unsigned i = 0, e = 2; i != e; ++i) { if (PIs[i] != P.end()) { Use *LoadUse = &Loads[i]->getOperandUse(0); - assert(PUs[i].U->get() == LoadUse->get()); - PUs[i].U = LoadUse; + assert(PUs[i].getUse()->get() == LoadUse->get()); + PUs[i].setUse(LoadUse); P.use_push_back(PIs[i], PUs[i]); } } @@ -1640,9 +1715,8 @@ private: /// /// This will return the BasePtr if that is valid, or build a new GEP /// instruction using the IRBuilder if GEP-ing is needed. -static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { +static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr, + SmallVectorImpl<Value *> &Indices) { if (Indices.empty()) return BasePtr; @@ -1651,7 +1725,7 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) return BasePtr; - return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx"); + return IRB.CreateInBoundsGEP(BasePtr, Indices, "idx"); } /// \brief Get a natural GEP off of the BasePtr walking through Ty toward @@ -1663,12 +1737,11 @@ static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, /// TargetTy. If we can't find one with the same type, we at least try to use /// one with the same size. If none of that works, we just produce the GEP as /// indicated by Indices to have the correct offset. -static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &TD, Value *BasePtr, Type *Ty, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Ty == TargetTy) - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); // See if we can descend into a struct and locate a field with the correct // type. @@ -1695,20 +1768,19 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD, if (ElementTy != TargetTy) Indices.erase(Indices.end() - NumLayers, Indices.end()); - return buildGEP(IRB, BasePtr, Indices, Prefix); + return buildGEP(IRB, BasePtr, Indices); } /// \brief Recursively compute indices for a natural GEP. /// /// This is the recursive step for getNaturalGEPWithOffset that walks down the /// element types adding appropriate indices for the GEP. -static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { if (Offset == 0) - return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix); + return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices); // We can't recurse through pointer types. if (Ty->isPointerTy()) @@ -1728,7 +1800,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), - Offset, TargetTy, Indices, Prefix); + Offset, TargetTy, Indices); } if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { @@ -1741,7 +1813,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } StructType *STy = dyn_cast<StructType>(Ty); @@ -1760,7 +1832,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, Indices.push_back(IRB.getInt32(Index)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Get a natural GEP from a base pointer to a particular offset and @@ -1773,10 +1845,9 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD, /// Indices, and setting Ty to the result subtype. /// /// If no natural GEP can be constructed, this function returns null. -static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, +static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &TD, Value *Ptr, APInt Offset, Type *TargetTy, - SmallVectorImpl<Value *> &Indices, - const Twine &Prefix) { + SmallVectorImpl<Value *> &Indices) { PointerType *Ty = cast<PointerType>(Ptr->getType()); // Don't consider any GEPs through an i8* as natural unless the TargetTy is @@ -1795,7 +1866,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, - Indices, Prefix); + Indices); } /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the @@ -1813,9 +1884,8 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD, /// properties. The algorithm tries to fold as many constant indices into /// a single GEP as possible, thus making each GEP more independent of the /// surrounding code. -static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, - Value *Ptr, APInt Offset, Type *PointerTy, - const Twine &Prefix) { +static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &TD, + Value *Ptr, APInt Offset, Type *PointerTy) { // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet<Value *, 4> Visited; @@ -1849,7 +1919,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, // See if we can perform a natural GEP here. Indices.clear(); if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, - Indices, Prefix)) { + Indices)) { if (P->getType() == PointerTy) { // Zap any offset pointer that we ended up computing in previous rounds. if (OffsetPtr && OffsetPtr->use_empty()) @@ -1884,19 +1954,19 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, if (!OffsetPtr) { if (!Int8Ptr) { Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), - Prefix + ".raw_cast"); + "raw_cast"); Int8PtrOffset = Offset; } OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), - Prefix + ".raw_idx"); + "raw_idx"); } Ptr = OffsetPtr; // On the off chance we were targeting i8*, guard the bitcast here. if (Ptr->getType() != PointerTy) - Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast"); + Ptr = IRB.CreateBitCast(Ptr, PointerTy, "cast"); return Ptr; } @@ -1910,6 +1980,10 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD, static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { if (OldTy == NewTy) return true; + if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy)) + if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy)) + if (NewITy->getBitWidth() >= OldITy->getBitWidth()) + return true; if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy)) return false; if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType()) @@ -1932,12 +2006,16 @@ static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) { /// This will try various different casting techniques, such as bitcasts, /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test /// two types for viability with this routine. -static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *Ty) { assert(canConvertValue(DL, V->getType(), Ty) && "Value not convertable to type"); if (V->getType() == Ty) return V; + if (IntegerType *OldITy = dyn_cast<IntegerType>(V->getType())) + if (IntegerType *NewITy = dyn_cast<IntegerType>(Ty)) + if (NewITy->getBitWidth() > OldITy->getBitWidth()) + return IRB.CreateZExt(V, NewITy); if (V->getType()->isIntegerTy() && Ty->isPointerTy()) return IRB.CreateIntToPtr(V, Ty); if (V->getType()->isPointerTy() && Ty->isIntegerTy()) @@ -1976,7 +2054,8 @@ static bool isVectorPromotionViable(const DataLayout &TD, ElementSize /= 8; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; @@ -1996,24 +2075,24 @@ static bool isVectorPromotionViable(const DataLayout &TD, = (NumElements == 1) ? Ty->getElementType() : VectorType::get(Ty->getElementType(), NumElements); - if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile()) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { + } else if (U->get()->getType()->getPointerElementType()->isStructTy()) { // Disable vector promotion when there are loads or stores of an FCA. return false; - } else if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (!canConvertValue(TD, PartitionTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { if (SI->isVolatile()) return false; if (!canConvertValue(TD, SI->getValueOperand()->getType(), PartitionTy)) @@ -2062,7 +2141,8 @@ static bool isIntegerWideningViable(const DataLayout &TD, // unsplittable entry (which we may make splittable later). bool WholeAllocaOp = false; for (; I != E; ++I) { - if (!I->U) + Use *U = I->getUse(); + if (!U) continue; // Skip dead use. uint64_t RelBegin = I->BeginOffset - AllocBeginOffset; @@ -2073,7 +2153,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelEnd > Size) return false; - if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { + if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) { if (LI->isVolatile()) return false; if (RelBegin == 0 && RelEnd == Size) @@ -2088,7 +2168,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, AllocaTy, LI->getType())) return false; - } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { + } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) { Type *ValueTy = SI->getValueOperand()->getType(); if (SI->isVolatile()) return false; @@ -2104,16 +2184,16 @@ static bool isIntegerWideningViable(const DataLayout &TD, if (RelBegin != 0 || RelEnd != Size || !canConvertValue(TD, ValueTy, AllocaTy)) return false; - } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { + } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) { if (MI->isVolatile() || !isa<Constant>(MI->getLength())) return false; - if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U->getUser())) { const AllocaPartitioning::MemTransferOffsets &MTO = P.getMemTransferOffsets(*MTI); if (!MTO.IsSplittable) return false; } - } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) { + } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) { if (II->getIntrinsicID() != Intrinsic::lifetime_start && II->getIntrinsicID() != Intrinsic::lifetime_end) return false; @@ -2124,7 +2204,7 @@ static bool isIntegerWideningViable(const DataLayout &TD, return WholeAllocaOp; } -static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, +static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name) { DEBUG(dbgs() << " start: " << *V << "\n"); @@ -2147,7 +2227,7 @@ static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V, return V; } -static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, +static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name) { IntegerType *IntTy = cast<IntegerType>(Old->getType()); IntegerType *Ty = cast<IntegerType>(V->getType()); @@ -2178,7 +2258,7 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old, return V; } -static Value *extractVector(IRBuilder<> &IRB, Value *V, +static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(V->getType()); @@ -2206,7 +2286,7 @@ static Value *extractVector(IRBuilder<> &IRB, Value *V, return V; } -static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V, +static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name) { VectorType *VecTy = cast<VectorType>(Old->getType()); assert(VecTy && "Can only insert a vector into a vector"); @@ -2296,11 +2376,13 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, // The offset of the partition user currently being rewritten. uint64_t BeginOffset, EndOffset; + bool IsSplit; Use *OldUse; Instruction *OldPtr; - // The name prefix to use when rewriting instructions for this alloca. - std::string NamePrefix; + // Utility IR builder, whose name prefix is setup for each visited use, and + // the insertion point is set to point to the user. + IRBuilderTy IRB; public: AllocaPartitionRewriter(const DataLayout &TD, AllocaPartitioning &P, @@ -2313,7 +2395,8 @@ public: NewAllocaEndOffset(NewEndOffset), NewAllocaTy(NewAI.getAllocatedType()), VecTy(), ElementTy(), ElementSize(), IntTy(), - BeginOffset(), EndOffset() { + BeginOffset(), EndOffset(), IsSplit(), OldUse(), OldPtr(), + IRB(NewAI.getContext(), ConstantFolder()) { } /// \brief Visit the users of the alloca partition and rewrite them. @@ -2335,14 +2418,21 @@ public: } bool CanSROA = true; for (; I != E; ++I) { - if (!I->U) + if (!I->getUse()) continue; // Skip dead uses. BeginOffset = I->BeginOffset; EndOffset = I->EndOffset; - OldUse = I->U; - OldPtr = cast<Instruction>(I->U->get()); - NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str(); - CanSROA &= visit(cast<Instruction>(I->U->getUser())); + IsSplit = I->isSplit(); + OldUse = I->getUse(); + OldPtr = cast<Instruction>(OldUse->get()); + + Instruction *OldUserI = cast<Instruction>(OldUse->getUser()); + IRB.SetInsertPoint(OldUserI); + IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc()); + IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); + + CanSROA &= visit(cast<Instruction>(OldUse->getUser())); } if (VecTy) { assert(CanSROA); @@ -2364,14 +2454,10 @@ private: llvm_unreachable("No rewrite rule for this instruction!"); } - Twine getName(const Twine &Suffix) { - return NamePrefix + Suffix; - } - - Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { + Value *getAdjustedAllocaPtr(IRBuilderTy &IRB, Type *PointerTy) { assert(BeginOffset >= NewAllocaBeginOffset); APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); - return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); + return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy); } /// \brief Compute suitable alignment to access an offset into the new alloca. @@ -2421,27 +2507,27 @@ private: Pass.DeadInsts.insert(I); } - Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) { + Value *rewriteVectorizedLoadInst() { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); assert(EndIndex > BeginIndex && "Empty vector!"); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec")); + "load"); + return extractVector(IRB, V, BeginIndex, EndIndex, "vec"); } - Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { + Value *rewriteIntegerLoad(LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); V = convertValue(TD, IRB, V, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; if (Offset > 0 || EndOffset < NewAllocaEndOffset) V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset, - getName(".extract")); + "extract"); return V; } @@ -2451,56 +2537,37 @@ private: assert(OldOp == OldPtr); uint64_t Size = EndOffset - BeginOffset; - bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType()); - // If this memory access can be shown to *statically* extend outside the - // bounds of the original allocation it's behavior is undefined. Rather - // than trying to transform it, just replace it with undef. - // FIXME: We should do something more clever for functions being - // instrumented by asan. - // FIXME: Eventually, once ASan and friends can flush out bugs here, this - // should be transformed to a load of null making it unreachable. - uint64_t OldAllocSize = TD.getTypeAllocSize(OldAI.getAllocatedType()); - if (TD.getTypeStoreSize(LI.getType()) > OldAllocSize) { - LI.replaceAllUsesWith(UndefValue::get(LI.getType())); - Pass.DeadInsts.insert(&LI); - deleteIfTriviallyDead(OldOp); - DEBUG(dbgs() << " to: undef!!\n"); - return true; - } - - IRBuilder<> IRB(&LI); - Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8) - : LI.getType(); + Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), Size * 8) + : LI.getType(); bool IsPtrAdjusted = false; Value *V; if (VecTy) { - V = rewriteVectorizedLoadInst(IRB); + V = rewriteVectorizedLoadInst(); } else if (IntTy && LI.getType()->isIntegerTy()) { - V = rewriteIntegerLoad(IRB, LI); + V = rewriteIntegerLoad(LI); } else if (BeginOffset == NewAllocaBeginOffset && canConvertValue(TD, NewAllocaTy, LI.getType())) { V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); } else { Type *LTy = TargetTy->getPointerTo(); V = IRB.CreateAlignedLoad(getAdjustedAllocaPtr(IRB, LTy), getPartitionTypeAlign(TargetTy), - LI.isVolatile(), getName(".load")); + LI.isVolatile(), "load"); IsPtrAdjusted = true; } V = convertValue(TD, IRB, V, TargetTy); - if (IsSplitIntLoad) { + if (IsSplit) { assert(!LI.isVolatile()); assert(LI.getType()->isIntegerTy() && "Only integer type loads and stores are split"); + assert(Size < TD.getTypeStoreSize(LI.getType()) && + "Split load isn't smaller than original load"); assert(LI.getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); - assert(LI.getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide loads can be split and recomposed"); // Move the insertion point just past the load so that we can refer to it. IRB.SetInsertPoint(llvm::next(BasicBlock::iterator(&LI))); // Create a placeholder value with the same type as LI to use as the @@ -2510,7 +2577,7 @@ private: Value *Placeholder = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); V = insertInteger(TD, IRB, Placeholder, V, BeginOffset, - getName(".insert")); + "insert"); LI.replaceAllUsesWith(V); Placeholder->replaceAllUsesWith(&LI); delete Placeholder; @@ -2524,7 +2591,7 @@ private: return !LI.isVolatile() && !IsPtrAdjusted; } - bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, Value *V, + bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) { unsigned BeginIndex = getIndex(BeginOffset); unsigned EndIndex = getIndex(EndOffset); @@ -2539,8 +2606,8 @@ private: // Mix in the existing elements. Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - V = insertVector(IRB, Old, V, BeginIndex, getName(".vec")); + "load"); + V = insertVector(IRB, Old, V, BeginIndex, "vec"); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.insert(&SI); @@ -2550,17 +2617,17 @@ private: return true; } - bool rewriteIntegerStore(IRBuilder<> &IRB, Value *V, StoreInst &SI) { + bool rewriteIntegerStore(Value *V, StoreInst &SI) { assert(IntTy && "We cannot extract an integer from the alloca"); assert(!SI.isVolatile()); if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset, - getName(".insert")); + "insert"); } V = convertValue(TD, IRB, V, NewAllocaTy); StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); @@ -2574,7 +2641,6 @@ private: DEBUG(dbgs() << " original: " << SI << "\n"); Value *OldOp = SI.getOperand(1); assert(OldOp == OldPtr); - IRBuilder<> IRB(&SI); Value *V = SI.getValueOperand(); @@ -2587,23 +2653,21 @@ private: uint64_t Size = EndOffset - BeginOffset; if (Size < TD.getTypeStoreSize(V->getType())) { assert(!SI.isVolatile()); + assert(IsSplit && "A seemingly split store isn't splittable"); assert(V->getType()->isIntegerTy() && "Only integer type loads and stores are split"); assert(V->getType()->getIntegerBitWidth() == TD.getTypeStoreSizeInBits(V->getType()) && "Non-byte-multiple bit width"); - assert(V->getType()->getIntegerBitWidth() == - TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) && - "Only alloca-wide stores can be split and recomposed"); IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8); V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset, - getName(".extract")); + "extract"); } if (VecTy) - return rewriteVectorizedStoreInst(IRB, V, SI, OldOp); + return rewriteVectorizedStoreInst(V, SI, OldOp); if (IntTy && V->getType()->isIntegerTy()) - return rewriteIntegerStore(IRB, V, SI); + return rewriteIntegerStore(V, SI); StoreInst *NewSI; if (BeginOffset == NewAllocaBeginOffset && @@ -2634,7 +2698,7 @@ private: /// /// \param V The i8 value to splat. /// \param Size The number of bytes in the output (assuming i8 is one byte) - Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) { + Value *getIntegerSplat(Value *V, unsigned Size) { assert(Size > 0 && "Expected a positive number of bytes."); IntegerType *VTy = cast<IntegerType>(V->getType()); assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte"); @@ -2642,26 +2706,25 @@ private: return V; Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8); - V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")), + V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, "zext"), ConstantExpr::getUDiv( Constant::getAllOnesValue(SplatIntTy), ConstantExpr::getZExt( Constant::getAllOnesValue(V->getType()), SplatIntTy)), - getName(".isplat")); + "isplat"); return V; } /// \brief Compute a vector splat for a given element value. - Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) { - V = IRB.CreateVectorSplat(NumElements, V, NamePrefix); + Value *getVectorSplat(Value *V, unsigned NumElements) { + V = IRB.CreateVectorSplat(NumElements, V, "vsplat"); DEBUG(dbgs() << " splat: " << *V << "\n"); return V; } bool visitMemSetInst(MemSetInst &II) { DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawDest() == OldPtr); // If the memset has a variable size, it cannot be split, just adjust the @@ -2718,31 +2781,31 @@ private: unsigned NumElements = EndIndex - BeginIndex; assert(NumElements <= VecTy->getNumElements() && "Too many elements!"); - Value *Splat = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ElementTy)/8); + Value *Splat = + getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ElementTy) / 8); Splat = convertValue(TD, IRB, Splat, ElementTy); if (NumElements > 1) - Splat = getVectorSplat(IRB, Splat, NumElements); + Splat = getVectorSplat(Splat, NumElements); Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec")); + "oldload"); + V = insertVector(IRB, Old, Splat, BeginIndex, "vec"); } else if (IntTy) { // If this is a memset on an alloca where we can widen stores, insert the // set integer. assert(!II.isVolatile()); uint64_t Size = EndOffset - BeginOffset; - V = getIntegerSplat(IRB, II.getValue(), Size); + V = getIntegerSplat(II.getValue(), Size); if (IntTy && (BeginOffset != NewAllocaBeginOffset || EndOffset != NewAllocaBeginOffset)) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert")); + V = insertInteger(TD, IRB, Old, V, Offset, "insert"); } else { assert(V->getType() == IntTy && "Wrong type for an alloca wide integer!"); @@ -2753,10 +2816,9 @@ private: assert(BeginOffset == NewAllocaBeginOffset); assert(EndOffset == NewAllocaEndOffset); - V = getIntegerSplat(IRB, II.getValue(), - TD.getTypeSizeInBits(ScalarTy)/8); + V = getIntegerSplat(II.getValue(), TD.getTypeSizeInBits(ScalarTy) / 8); if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy)) - V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements()); + V = getVectorSplat(V, AllocaVecTy->getNumElements()); V = convertValue(TD, IRB, V, AllocaTy); } @@ -2773,7 +2835,6 @@ private: // them into two categories: split intrinsics and unsplit intrinsics. DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); bool IsDest = II.getRawDest() == OldPtr; @@ -2857,8 +2918,7 @@ private: // Compute the other pointer, folding as much as possible to produce // a single, simple GEP in most cases. - OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *OurPtr = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() @@ -2901,8 +2961,7 @@ private: OtherPtrTy = SubIntTy->getPointerTo(); } - Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, - getName("." + OtherPtr->getName())); + Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy); Value *DstPtr = &NewAI; if (!IsDest) std::swap(SrcPtr, DstPtr); @@ -2910,31 +2969,31 @@ private: Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); - Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec")); + "load"); + Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); } else if (IntTy && !IsWholeAlloca && !IsDest) { Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".load")); + "load"); Src = convertValue(TD, IRB, Src, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract")); + Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, "extract"); } else { Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), - getName(".copyload")); + "copyload"); } if (VecTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); - Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec")); + "oldload"); + Src = insertVector(IRB, Old, Src, BeginIndex, "vec"); } else if (IntTy && !IsWholeAlloca && IsDest) { Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), - getName(".oldload")); + "oldload"); Old = convertValue(TD, IRB, Old, IntTy); assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = BeginOffset - NewAllocaBeginOffset; - Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert")); + Src = insertInteger(TD, IRB, Old, Src, Offset, "insert"); Src = convertValue(TD, IRB, Src, NewAllocaTy); } @@ -2949,7 +3008,6 @@ private: assert(II.getIntrinsicID() == Intrinsic::lifetime_start || II.getIntrinsicID() == Intrinsic::lifetime_end); DEBUG(dbgs() << " original: " << II << "\n"); - IRBuilder<> IRB(&II); assert(II.getArgOperand(1) == OldPtr); // Record this instruction for deletion. @@ -2977,7 +3035,9 @@ private: // as local as possible to the PHI. To do that, we re-use the location of // the old pointer, which necessarily must be in the right position to // dominate the PHI. - IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr)); + IRBuilderTy PtrBuilder(cast<Instruction>(OldPtr)); + PtrBuilder.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + + "."); Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType()); // Replace the operands which were using the old pointer. @@ -2990,7 +3050,6 @@ private: bool visitSelectInst(SelectInst &SI) { DEBUG(dbgs() << " original: " << SI << "\n"); - IRBuilder<> IRB(&SI); // Find the operand we need to rewrite here. bool IsTrueVal = SI.getTrueValue() == OldPtr; @@ -3065,7 +3124,7 @@ private: class OpSplitter { protected: /// The builder used to form new instructions. - IRBuilder<> IRB; + IRBuilderTy IRB; /// The indices which to be used with insert- or extractvalue to select the /// appropriate value within the aggregate. SmallVector<unsigned, 4> Indices; @@ -3277,12 +3336,13 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty, Type *ElementTy = SeqTy->getElementType(); uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); uint64_t NumSkippedElements = Offset / ElementSize; - if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) + if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) { if (NumSkippedElements >= ArrTy->getNumElements()) return 0; - if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) + } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) { if (NumSkippedElements >= VecTy->getNumElements()) return 0; + } Offset -= NumSkippedElements * ElementSize; // First check if we need to recurse. @@ -3380,7 +3440,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, for (AllocaPartitioning::use_iterator UI = P.use_begin(PI), UE = P.use_end(PI); UI != UE && !IsLive; ++UI) - if (UI->U) + if (UI->getUse()) IsLive = true; if (!IsLive) return false; // No live uses left of this partition. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 916b37d4a8..3514e6c2aa 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -19,7 +19,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! @@ -35,7 +34,6 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" using namespace llvm; -STATISTIC(NumAnnotated, "Number of attributes added to library functions"); //===----------------------------------------------------------------------===// // Optimizer Base Class @@ -91,8 +89,6 @@ namespace { TargetLibraryInfo *TLI; StringMap<LibCallOptimization*> Optimizations; - - bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification SimplifyLibCalls() : FunctionPass(ID) { @@ -104,14 +100,6 @@ namespace { void InitOptimizations(); bool runOnFunction(Function &F); - void setDoesNotAccessMemory(Function &F); - void setOnlyReadsMemory(Function &F); - void setDoesNotThrow(Function &F); - void setDoesNotCapture(Function &F, unsigned n); - void setDoesNotAlias(Function &F, unsigned n); - bool doInitialization(Module &M); - - void inferPrototypeAttributes(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetLibraryInfo>(); } @@ -208,697 +196,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { return Changed; } -// Utility methods for doInitialization. - -void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) { - if (!F.doesNotAccessMemory()) { - F.setDoesNotAccessMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setOnlyReadsMemory(Function &F) { - if (!F.onlyReadsMemory()) { - F.setOnlyReadsMemory(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotThrow(Function &F) { - if (!F.doesNotThrow()) { - F.setDoesNotThrow(); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) { - if (!F.doesNotCapture(n)) { - F.setDoesNotCapture(n); - ++NumAnnotated; - Modified = true; - } -} -void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { - if (!F.doesNotAlias(n)) { - F.setDoesNotAlias(n); - ++NumAnnotated; - Modified = true; - } -} - - -void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { - FunctionType *FTy = F.getFunctionType(); - - StringRef Name = F.getName(); - switch (Name[0]) { - case 's': - if (Name == "strlen") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strchr" || - Name == "strrchr") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "strcpy" || - Name == "stpcpy" || - Name == "strcat" || - Name == "strtol" || - Name == "strtod" || - Name == "strtof" || - Name == "strtoul" || - Name == "strtoll" || - Name == "strtold" || - Name == "strncat" || - Name == "strncpy" || - Name == "stpncpy" || - Name == "strtoull") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strxfrm") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strcmp" || - Name == "strspn" || - Name == "strncmp" || - Name == "strcspn" || - Name == "strcoll" || - Name == "strcasecmp" || - Name == "strncasecmp") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strstr" || - Name == "strpbrk") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strtok" || - Name == "strtok_r") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "scanf" || - Name == "setbuf" || - Name == "setvbuf") { - if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strdup" || - Name == "strndup") { - if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "stat" || - Name == "sscanf" || - Name == "sprintf" || - Name == "statvfs") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "snprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } else if (Name == "setitimer") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - } else if (Name == "system") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - case 'm': - if (Name == "malloc") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "memcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "memchr" || - Name == "memrchr") { - if (FTy->getNumParams() != 3) - return; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "modf" || - Name == "modff" || - Name == "modfl" || - Name == "memcpy" || - Name == "memccpy" || - Name == "memmove") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "memalign") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotAlias(F, 0); - } else if (Name == "mkdir" || - Name == "mktime") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'r': - if (Name == "realloc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "read") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } else if (Name == "rmdir" || - Name == "rewind" || - Name == "remove" || - Name == "realpath") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "rename" || - Name == "readlink") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'w': - if (Name == "write") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } - break; - case 'b': - if (Name == "bcopy") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bzero") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'c': - if (Name == "calloc") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "chmod" || - Name == "chown" || - Name == "ctermid" || - Name == "clearerr" || - Name == "closedir") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'a': - if (Name == "atoi" || - Name == "atol" || - Name == "atof" || - Name == "atoll") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "access") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'f': - if (Name == "fopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "fdopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - } else if (Name == "feof" || - Name == "free" || - Name == "fseek" || - Name == "ftell" || - Name == "fgetc" || - Name == "fseeko" || - Name == "ftello" || - Name == "fileno" || - Name == "fflush" || - Name == "fclose" || - Name == "fsetpos" || - Name == "flockfile" || - Name == "funlockfile" || - Name == "ftrylockfile") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "ferror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - } else if (Name == "fputc" || - Name == "fstat" || - Name == "frexp" || - Name == "frexpf" || - Name == "frexpl" || - Name == "fstatvfs") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "fgets") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - } else if (Name == "fread" || - Name == "fwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - } else if (Name == "fputs" || - Name == "fscanf" || - Name == "fprintf" || - Name == "fgetpos") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'g': - if (Name == "getc" || - Name == "getlogin_r" || - Name == "getc_unlocked") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "getenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "gets" || - Name == "getchar") { - setDoesNotThrow(F); - } else if (Name == "getitimer") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "getpwnam") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'u': - if (Name == "ungetc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "uname" || - Name == "unlink" || - Name == "unsetenv") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "utime" || - Name == "utimes") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'p': - if (Name == "putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "puts" || - Name == "printf" || - Name == "perror") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "pread" || - Name == "pwrite") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) - return; - // May throw; these are valid pthread cancellation points. - setDoesNotCapture(F, 2); - } else if (Name == "putchar") { - setDoesNotThrow(F); - } else if (Name == "popen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "pclose") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'v': - if (Name == "vscanf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vsscanf" || - Name == "vfscanf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "valloc") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "vprintf") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vfprintf" || - Name == "vsprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "vsnprintf") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } - break; - case 'o': - if (Name == "open") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } else if (Name == "opendir") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } - break; - case 't': - if (Name == "tmpfile") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "times") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'h': - if (Name == "htonl" || - Name == "htons") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'n': - if (Name == "ntohl" || - Name == "ntohs") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'l': - if (Name == "lstat") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "lchown") { - if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'q': - if (Name == "qsort") { - if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) - return; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - } - break; - case '_': - if (Name == "__strdup" || - Name == "__strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "__strtok_r") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "_IO_getc") { - if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "_IO_putc") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } - break; - case 1: - if (Name == "\1__isoc99_scanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1stat64" || - Name == "\1lstat64" || - Name == "\1statvfs64" || - Name == "\1__isoc99_sscanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fopen64") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fseeko64" || - Name == "\1ftello64") { - if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1tmpfile64") { - if (!FTy->getReturnType()->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "\1fstat64" || - Name == "\1fstatvfs64") { - if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) - return; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "\1open64") { - if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) - return; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - } -} - -/// doInitialization - Add attributes to well-known functions. -/// -bool SimplifyLibCalls::doInitialization(Module &M) { - Modified = false; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { - Function &F = *I; - if (F.isDeclaration() && F.hasName()) - inferPrototypeAttributes(F); - } - return Modified; -} - // TODO: // Additional cases that we need to add to this file: // diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 0d2598a221..e9828d60cd 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -82,7 +82,8 @@ namespace { /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. - void forwardResume(ResumeInst *RI); + void forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads); /// addIncomingPHIValuesFor - Add incoming-PHI values to the unwind /// destination block for the given basic block, using the values for the @@ -140,8 +141,10 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() { /// block. When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. -void InvokeInliningInfo::forwardResume(ResumeInst *RI) { +void InvokeInliningInfo::forwardResume(ResumeInst *RI, + SmallPtrSet<LandingPadInst*, 16> &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); + LandingPadInst *OuterLPad = getLandingPadInst(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); @@ -152,6 +155,16 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) { InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + for (SmallPtrSet<LandingPadInst*, 16>::iterator I = InlinedLPads.begin(), + E = InlinedLPads.end(); I != E; ++I) { + LandingPadInst *InlinedLPad = *I; + for (unsigned OuterIdx = 0, OuterNum = OuterLPad->getNumClauses(); + OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + } } /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into @@ -229,19 +242,15 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to - // rewrite. If the code doesn't have calls or unwinds, we know there is - // nothing to rewrite. - if (!InlinedCodeInfo.ContainsCalls) { - // Now that everything is happy, we have one final detail. The PHI nodes in - // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the - // PHI node) now. - InvokeDest->removePredecessor(II->getParent()); - return; - } - + // rewrite. InvokeInliningInfo Invoke(II); - + + // Get all of the inlined landing pad instructions. + SmallPtrSet<LandingPadInst*, 16> InlinedLPads; + for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I) + if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) + InlinedLPads.insert(II->getLandingPadInst()); + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){ if (InlinedCodeInfo.ContainsCalls) if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) { @@ -250,13 +259,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, continue; } + // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) - Invoke.forwardResume(RI); + Invoke.forwardResume(RI, InlinedLPads); } // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original - // invoke instruction. Eliminate these entries (which might even delete the + // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. InvokeDest->removePredecessor(II->getParent()); } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a54ee08b67..be80d34d96 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -985,22 +985,17 @@ bool llvm::removeUnreachableBlocks(Function &F) { if (Reachable.count(I)) continue; - // Remove the block as predecessor of all its reachable successors. - // Unreachable successors don't matter as they'll soon be removed, too. for (succ_iterator SI = succ_begin(I), SE = succ_end(I); SI != SE; ++SI) if (Reachable.count(*SI)) (*SI)->removePredecessor(I); + I->dropAllReferences(); + } - // Zap all instructions in this basic block. - while (!I->empty()) { - Instruction &Inst = I->back(); - if (!Inst.use_empty()) - Inst.replaceAllUsesWith(UndefValue::get(Inst.getType())); - I->getInstList().pop_back(); - } + for (Function::iterator I = llvm::next(F.begin()), E=F.end(); I != E;) + if (!Reachable.count(I)) + I = F.getBasicBlockList().erase(I); + else + ++I; - --I; - llvm::next(I)->eraseFromParent(); - } return true; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 07dd453424..930d9c412f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3338,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll index 464b6ece0e..ba9d84cf3e 100644 --- a/test/Analysis/CostModel/ARM/cast.ll +++ b/test/Analysis/CostModel/ARM/cast.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios6.0.0" @@ -152,6 +152,395 @@ define i32 @casts() { ; CHECK: cost of 10 {{.*}} uitofp %r69 = uitofp i64 undef to double + ; CHECK: cost of 3 {{.*}} sext + %r70 = sext <8 x i8> undef to <8 x i32> + ; CHECK: cost of 6 {{.*}} sext + %r71 = sext <16 x i8> undef to <16 x i32> + ; CHECK: cost of 3 {{.*}} zext + %r72 = zext <8 x i8> undef to <8 x i32> + ; CHECK: cost of 6 {{.*}} zext + %r73 = zext <16 x i8> undef to <16 x i32> + + ; CHECK: cost of 7 {{.*}} sext + %rext_0 = sext <8 x i8> undef to <8 x i64> + ; CHECK: cost of 7 {{.*}} zext + %rext_1 = zext <8 x i8> undef to <8 x i64> + ; CHECK: cost of 6 {{.*}} sext + %rext_2 = sext <8 x i16> undef to <8 x i64> + ; CHECK: cost of 6 {{.*}} zext + %rext_3 = zext <8 x i16> undef to <8 x i64> + ; CHECK: cost of 3 {{.*}} sext + %rext_4 = sext <4 x i16> undef to <4 x i64> + ; CHECK: cost of 3 {{.*}} zext + %rext_5 = zext <4 x i16> undef to <4 x i64> + + ; Vector cast cost of instructions lowering the cast to the stack. + ; CHECK: cost of 19 {{.*}} trunc + %r74 = trunc <8 x i32> undef to <8 x i8> + ; CHECK: cost of 38 {{.*}} trunc + %r75 = trunc <16 x i32> undef to <16 x i8> + + ; Floating point truncation costs. + ; CHECK: cost of 1 {{.*}} fptrunc double + %r80 = fptrunc double undef to float + ; CHECK: cost of 2 {{.*}} fptrunc <2 x double + %r81 = fptrunc <2 x double> undef to <2 x float> + ; CHECK: cost of 4 {{.*}} fptrunc <4 x double + %r82 = fptrunc <4 x double> undef to <4 x float> + ; CHECK: cost of 8 {{.*}} fptrunc <8 x double + %r83 = fptrunc <8 x double> undef to <8 x float> + ; CHECK: cost of 16 {{.*}} fptrunc <16 x double + %r84 = fptrunc <16 x double> undef to <16 x float> + + ; Floating point extension costs. + ; CHECK: cost of 1 {{.*}} fpext float + %r85 = fpext float undef to double + ; CHECK: cost of 2 {{.*}} fpext <2 x float + %r86 = fpext <2 x float> undef to <2 x double> + ; CHECK: cost of 4 {{.*}} fpext <4 x float + %r87 = fpext <4 x float> undef to <4 x double> + ; CHECK: cost of 8 {{.*}} fpext <8 x float + %r88 = fpext <8 x float> undef to <8 x double> + ; CHECK: cost of 16 {{.*}} fpext <16 x float + %r89 = fpext <16 x float> undef to <16 x double> + + ;; Floating point to integer vector casts. + ; CHECK: cost of 1 {{.*}} fptoui + %r90 = fptoui <2 x float> undef to <2 x i1> + ; CHECK: cost of 1 {{.*}} fptosi + %r91 = fptosi <2 x float> undef to <2 x i1> + ; CHECK: cost of 1 {{.*}} fptoui + %r92 = fptoui <2 x float> undef to <2 x i8> + ; CHECK: cost of 1 {{.*}} fptosi + %r93 = fptosi <2 x float> undef to <2 x i8> + ; CHECK: cost of 1 {{.*}} fptoui + %r94 = fptoui <2 x float> undef to <2 x i16> + ; CHECK: cost of 1 {{.*}} fptosi + %r95 = fptosi <2 x float> undef to <2 x i16> + ; CHECK: cost of 1 {{.*}} fptoui + %r96 = fptoui <2 x float> undef to <2 x i32> + ; CHECK: cost of 1 {{.*}} fptosi + %r97 = fptosi <2 x float> undef to <2 x i32> + ; CHECK: cost of 24 {{.*}} fptoui + %r98 = fptoui <2 x float> undef to <2 x i64> + ; CHECK: cost of 24 {{.*}} fptosi + %r99 = fptosi <2 x float> undef to <2 x i64> + + ; CHECK: cost of 8 {{.*}} fptoui + %r100 = fptoui <2 x double> undef to <2 x i1> + ; CHECK: cost of 8 {{.*}} fptosi + %r101 = fptosi <2 x double> undef to <2 x i1> + ; CHECK: cost of 8 {{.*}} fptoui + %r102 = fptoui <2 x double> undef to <2 x i8> + ; CHECK: cost of 8 {{.*}} fptosi + %r103 = fptosi <2 x double> undef to <2 x i8> + ; CHECK: cost of 8 {{.*}} fptoui + %r104 = fptoui <2 x double> undef to <2 x i16> + ; CHECK: cost of 8 {{.*}} fptosi + %r105 = fptosi <2 x double> undef to <2 x i16> + ; CHECK: cost of 2 {{.*}} fptoui + %r106 = fptoui <2 x double> undef to <2 x i32> + ; CHECK: cost of 2 {{.*}} fptosi + %r107 = fptosi <2 x double> undef to <2 x i32> + ; CHECK: cost of 24 {{.*}} fptoui + %r108 = fptoui <2 x double> undef to <2 x i64> + ; CHECK: cost of 24 {{.*}} fptosi + %r109 = fptosi <2 x double> undef to <2 x i64> + + ; CHECK: cost of 16 {{.*}} fptoui + %r110 = fptoui <4 x float> undef to <4 x i1> + ; CHECK: cost of 16 {{.*}} fptosi + %r111 = fptosi <4 x float> undef to <4 x i1> + ; CHECK: cost of 3 {{.*}} fptoui + %r112 = fptoui <4 x float> undef to <4 x i8> + ; CHECK: cost of 3 {{.*}} fptosi + %r113 = fptosi <4 x float> undef to <4 x i8> + ; CHECK: cost of 2 {{.*}} fptoui + %r114 = fptoui <4 x float> undef to <4 x i16> + ; CHECK: cost of 2 {{.*}} fptosi + %r115 = fptosi <4 x float> undef to <4 x i16> + ; CHECK: cost of 1 {{.*}} fptoui + %r116 = fptoui <4 x float> undef to <4 x i32> + ; CHECK: cost of 1 {{.*}} fptosi + %r117 = fptosi <4 x float> undef to <4 x i32> + ; CHECK: cost of 48 {{.*}} fptoui + %r118 = fptoui <4 x float> undef to <4 x i64> + ; CHECK: cost of 48 {{.*}} fptosi + %r119 = fptosi <4 x float> undef to <4 x i64> + + ; CHECK: cost of 16 {{.*}} fptoui + %r120 = fptoui <4 x double> undef to <4 x i1> + ; CHECK: cost of 16 {{.*}} fptosi + %r121 = fptosi <4 x double> undef to <4 x i1> + ; CHECK: cost of 16 {{.*}} fptoui + %r122 = fptoui <4 x double> undef to <4 x i8> + ; CHECK: cost of 16 {{.*}} fptosi + %r123 = fptosi <4 x double> undef to <4 x i8> + ; CHECK: cost of 16 {{.*}} fptoui + %r124 = fptoui <4 x double> undef to <4 x i16> + ; CHECK: cost of 16 {{.*}} fptosi + %r125 = fptosi <4 x double> undef to <4 x i16> + ; CHECK: cost of 16 {{.*}} fptoui + %r126 = fptoui <4 x double> undef to <4 x i32> + ; CHECK: cost of 16 {{.*}} fptosi + %r127 = fptosi <4 x double> undef to <4 x i32> + ; CHECK: cost of 48 {{.*}} fptoui + %r128 = fptoui <4 x double> undef to <4 x i64> + ; CHECK: cost of 48 {{.*}} fptosi + %r129 = fptosi <4 x double> undef to <4 x i64> + + ; CHECK: cost of 32 {{.*}} fptoui + %r130 = fptoui <8 x float> undef to <8 x i1> + ; CHECK: cost of 32 {{.*}} fptosi + %r131 = fptosi <8 x float> undef to <8 x i1> + ; CHECK: cost of 32 {{.*}} fptoui + %r132 = fptoui <8 x float> undef to <8 x i8> + ; CHECK: cost of 32 {{.*}} fptosi + %r133 = fptosi <8 x float> undef to <8 x i8> + ; CHECK: cost of 4 {{.*}} fptoui + %r134 = fptoui <8 x float> undef to <8 x i16> + ; CHECK: cost of 4 {{.*}} fptosi + %r135 = fptosi <8 x float> undef to <8 x i16> + ; CHECK: cost of 2 {{.*}} fptoui + %r136 = fptoui <8 x float> undef to <8 x i32> + ; CHECK: cost of 2 {{.*}} fptosi + %r137 = fptosi <8 x float> undef to <8 x i32> + ; CHECK: cost of 96 {{.*}} fptoui + %r138 = fptoui <8 x float> undef to <8 x i64> + ; CHECK: cost of 96 {{.*}} fptosi + %r139 = fptosi <8 x float> undef to <8 x i64> + + ; CHECK: cost of 32 {{.*}} fptoui + %r140 = fptoui <8 x double> undef to <8 x i1> + ; CHECK: cost of 32 {{.*}} fptosi + %r141 = fptosi <8 x double> undef to <8 x i1> + ; CHECK: cost of 32 {{.*}} fptoui + %r142 = fptoui <8 x double> undef to <8 x i8> + ; CHECK: cost of 32 {{.*}} fptosi + %r143 = fptosi <8 x double> undef to <8 x i8> + ; CHECK: cost of 32 {{.*}} fptoui + %r144 = fptoui <8 x double> undef to <8 x i16> + ; CHECK: cost of 32 {{.*}} fptosi + %r145 = fptosi <8 x double> undef to <8 x i16> + ; CHECK: cost of 32 {{.*}} fptoui + %r146 = fptoui <8 x double> undef to <8 x i32> + ; CHECK: cost of 32 {{.*}} fptosi + %r147 = fptosi <8 x double> undef to <8 x i32> + ; CHECK: cost of 96 {{.*}} fptoui + %r148 = fptoui <8 x double> undef to <8 x i64> + ; CHECK: cost of 96 {{.*}} fptosi + %r149 = fptosi <8 x double> undef to <8 x i64> + + ; CHECK: cost of 64 {{.*}} fptoui + %r150 = fptoui <16 x float> undef to <16 x i1> + ; CHECK: cost of 64 {{.*}} fptosi + %r151 = fptosi <16 x float> undef to <16 x i1> + ; CHECK: cost of 64 {{.*}} fptoui + %r152 = fptoui <16 x float> undef to <16 x i8> + ; CHECK: cost of 64 {{.*}} fptosi + %r153 = fptosi <16 x float> undef to <16 x i8> + ; CHECK: cost of 8 {{.*}} fptoui + %r154 = fptoui <16 x float> undef to <16 x i16> + ; CHECK: cost of 8 {{.*}} fptosi + %r155 = fptosi <16 x float> undef to <16 x i16> + ; CHECK: cost of 4 {{.*}} fptoui + %r156 = fptoui <16 x float> undef to <16 x i32> + ; CHECK: cost of 4 {{.*}} fptosi + %r157 = fptosi <16 x float> undef to <16 x i32> + ; CHECK: cost of 192 {{.*}} fptoui + %r158 = fptoui <16 x float> undef to <16 x i64> + ; CHECK: cost of 192 {{.*}} fptosi + %r159 = fptosi <16 x float> undef to <16 x i64> + + ; CHECK: cost of 64 {{.*}} fptoui + %r160 = fptoui <16 x double> undef to <16 x i1> + ; CHECK: cost of 64 {{.*}} fptosi + %r161 = fptosi <16 x double> undef to <16 x i1> + ; CHECK: cost of 64 {{.*}} fptoui + %r162 = fptoui <16 x double> undef to <16 x i8> + ; CHECK: cost of 64 {{.*}} fptosi + %r163 = fptosi <16 x double> undef to <16 x i8> + ; CHECK: cost of 64 {{.*}} fptoui + %r164 = fptoui <16 x double> undef to <16 x i16> + ; CHECK: cost of 64 {{.*}} fptosi + %r165 = fptosi <16 x double> undef to <16 x i16> + ; CHECK: cost of 64 {{.*}} fptoui + %r166 = fptoui <16 x double> undef to <16 x i32> + ; CHECK: cost of 64 {{.*}} fptosi + %r167 = fptosi <16 x double> undef to <16 x i32> + ; CHECK: cost of 192 {{.*}} fptoui + %r168 = fptoui <16 x double> undef to <16 x i64> + ; CHECK: cost of 192 {{.*}} fptosi + %r169 = fptosi <16 x double> undef to <16 x i64> + + ; CHECK: cost of 8 {{.*}} uitofp + %r170 = uitofp <2 x i1> undef to <2 x float> + ; CHECK: cost of 8 {{.*}} sitofp + %r171 = sitofp <2 x i1> undef to <2 x float> + ; CHECK: cost of 3 {{.*}} uitofp + %r172 = uitofp <2 x i8> undef to <2 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %r173 = sitofp <2 x i8> undef to <2 x float> + ; CHECK: cost of 2 {{.*}} uitofp + %r174 = uitofp <2 x i16> undef to <2 x float> + ; CHECK: cost of 2 {{.*}} sitofp + %r175 = sitofp <2 x i16> undef to <2 x float> + ; CHECK: cost of 1 {{.*}} uitofp + %r176 = uitofp <2 x i32> undef to <2 x float> + ; CHECK: cost of 1 {{.*}} sitofp + %r177 = sitofp <2 x i32> undef to <2 x float> + ; CHECK: cost of 24 {{.*}} uitofp + %r178 = uitofp <2 x i64> undef to <2 x float> + ; CHECK: cost of 24 {{.*}} sitofp + %r179 = sitofp <2 x i64> undef to <2 x float> + + ; CHECK: cost of 8 {{.*}} uitofp + %r180 = uitofp <2 x i1> undef to <2 x double> + ; CHECK: cost of 8 {{.*}} sitofp + %r181 = sitofp <2 x i1> undef to <2 x double> + ; CHECK: cost of 4 {{.*}} uitofp + %r182 = uitofp <2 x i8> undef to <2 x double> + ; CHECK: cost of 4 {{.*}} sitofp + %r183 = sitofp <2 x i8> undef to <2 x double> + ; CHECK: cost of 3 {{.*}} uitofp + %r184 = uitofp <2 x i16> undef to <2 x double> + ; CHECK: cost of 3 {{.*}} sitofp + %r185 = sitofp <2 x i16> undef to <2 x double> + ; CHECK: cost of 2 {{.*}} uitofp + %r186 = uitofp <2 x i32> undef to <2 x double> + ; CHECK: cost of 2 {{.*}} sitofp + %r187 = sitofp <2 x i32> undef to <2 x double> + ; CHECK: cost of 24 {{.*}} uitofp + %r188 = uitofp <2 x i64> undef to <2 x double> + ; CHECK: cost of 24 {{.*}} sitofp + %r189 = sitofp <2 x i64> undef to <2 x double> + + ; CHECK: cost of 3 {{.*}} uitofp + %r190 = uitofp <4 x i1> undef to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %r191 = sitofp <4 x i1> undef to <4 x float> + ; CHECK: cost of 3 {{.*}} uitofp + %r192 = uitofp <4 x i8> undef to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %r193 = sitofp <4 x i8> undef to <4 x float> + ; CHECK: cost of 2 {{.*}} uitofp + %r194 = uitofp <4 x i16> undef to <4 x float> + ; CHECK: cost of 2 {{.*}} sitofp + %r195 = sitofp <4 x i16> undef to <4 x float> + ; CHECK: cost of 1 {{.*}} uitofp + %r196 = uitofp <4 x i32> undef to <4 x float> + ; CHECK: cost of 1 {{.*}} sitofp + %r197 = sitofp <4 x i32> undef to <4 x float> + ; CHECK: cost of 48 {{.*}} uitofp + %r198 = uitofp <4 x i64> undef to <4 x float> + ; CHECK: cost of 48 {{.*}} sitofp + %r199 = sitofp <4 x i64> undef to <4 x float> + + ; CHECK: cost of 16 {{.*}} uitofp + %r200 = uitofp <4 x i1> undef to <4 x double> + ; CHECK: cost of 16 {{.*}} sitofp + %r201 = sitofp <4 x i1> undef to <4 x double> + ; CHECK: cost of 16 {{.*}} uitofp + %r202 = uitofp <4 x i8> undef to <4 x double> + ; CHECK: cost of 16 {{.*}} sitofp + %r203 = sitofp <4 x i8> undef to <4 x double> + ; CHECK: cost of 16 {{.*}} uitofp + %r204 = uitofp <4 x i16> undef to <4 x double> + ; CHECK: cost of 16 {{.*}} sitofp + %r205 = sitofp <4 x i16> undef to <4 x double> + ; CHECK: cost of 16 {{.*}} uitofp + %r206 = uitofp <4 x i32> undef to <4 x double> + ; CHECK: cost of 16 {{.*}} sitofp + %r207 = sitofp <4 x i32> undef to <4 x double> + ; CHECK: cost of 48 {{.*}} uitofp + %r208 = uitofp <4 x i64> undef to <4 x double> + ; CHECK: cost of 48 {{.*}} sitofp + %r209 = sitofp <4 x i64> undef to <4 x double> + + ; CHECK: cost of 32 {{.*}} uitofp + %r210 = uitofp <8 x i1> undef to <8 x float> + ; CHECK: cost of 32 {{.*}} sitofp + %r211 = sitofp <8 x i1> undef to <8 x float> + ; CHECK: cost of 32 {{.*}} uitofp + %r212 = uitofp <8 x i8> undef to <8 x float> + ; CHECK: cost of 32 {{.*}} sitofp + %r213 = sitofp <8 x i8> undef to <8 x float> + ; CHECK: cost of 4 {{.*}} uitofp + %r214 = uitofp <8 x i16> undef to <8 x float> + ; CHECK: cost of 4 {{.*}} sitofp + %r215 = sitofp <8 x i16> undef to <8 x float> + ; CHECK: cost of 2 {{.*}} uitofp + %r216 = uitofp <8 x i32> undef to <8 x float> + ; CHECK: cost of 2 {{.*}} sitofp + %r217 = sitofp <8 x i32> undef to <8 x float> + ; CHECK: cost of 96 {{.*}} uitofp + %r218 = uitofp <8 x i64> undef to <8 x float> + ; CHECK: cost of 96 {{.*}} sitofp + %r219 = sitofp <8 x i64> undef to <8 x float> + + ; CHECK: cost of 32 {{.*}} uitofp + %r220 = uitofp <8 x i1> undef to <8 x double> + ; CHECK: cost of 32 {{.*}} sitofp + %r221 = sitofp <8 x i1> undef to <8 x double> + ; CHECK: cost of 32 {{.*}} uitofp + %r222 = uitofp <8 x i8> undef to <8 x double> + ; CHECK: cost of 32 {{.*}} sitofp + %r223 = sitofp <8 x i8> undef to <8 x double> + ; CHECK: cost of 32 {{.*}} uitofp + %r224 = uitofp <8 x i16> undef to <8 x double> + ; CHECK: cost of 32 {{.*}} sitofp + %r225 = sitofp <8 x i16> undef to <8 x double> + ; CHECK: cost of 32 {{.*}} uitofp + %r226 = uitofp <8 x i16> undef to <8 x double> + ; CHECK: cost of 32 {{.*}} sitofp + %r227 = sitofp <8 x i16> undef to <8 x double> + ; CHECK: cost of 96 {{.*}} uitofp + %r228 = uitofp <8 x i64> undef to <8 x double> + ; CHECK: cost of 96 {{.*}} sitofp + %r229 = sitofp <8 x i64> undef to <8 x double> + + ; CHECK: cost of 64 {{.*}} uitofp + %r230 = uitofp <16 x i1> undef to <16 x float> + ; CHECK: cost of 64 {{.*}} sitofp + %r231 = sitofp <16 x i1> undef to <16 x float> + ; CHECK: cost of 64 {{.*}} uitofp + %r232 = uitofp <16 x i8> undef to <16 x float> + ; CHECK: cost of 64 {{.*}} sitofp + %r233 = sitofp <16 x i8> undef to <16 x float> + ; CHECK: cost of 8 {{.*}} uitofp + %r234 = uitofp <16 x i16> undef to <16 x float> + ; CHECK: cost of 8 {{.*}} sitofp + %r235 = sitofp <16 x i16> undef to <16 x float> + ; CHECK: cost of 4 {{.*}} uitofp + %r236 = uitofp <16 x i32> undef to <16 x float> + ; CHECK: cost of 4 {{.*}} sitofp + %r237 = sitofp <16 x i32> undef to <16 x float> + ; CHECK: cost of 192 {{.*}} uitofp + %r238 = uitofp <16 x i64> undef to <16 x float> + ; CHECK: cost of 192 {{.*}} sitofp + %r239 = sitofp <16 x i64> undef to <16 x float> + + ; CHECK: cost of 64 {{.*}} uitofp + %r240 = uitofp <16 x i1> undef to <16 x double> + ; CHECK: cost of 64 {{.*}} sitofp + %r241 = sitofp <16 x i1> undef to <16 x double> + ; CHECK: cost of 64 {{.*}} uitofp + %r242 = uitofp <16 x i8> undef to <16 x double> + ; CHECK: cost of 64 {{.*}} sitofp + %r243 = sitofp <16 x i8> undef to <16 x double> + ; C4ECK: cost of 64 {{.*}} uitofp + %r244 = uitofp <16 x i16> undef to <16 x double> + ; CHECK: cost of 64 {{.*}} sitofp + %r245 = sitofp <16 x i16> undef to <16 x double> + ; CHECK: cost of 64 {{.*}} uitofp + %r246 = uitofp <16 x i16> undef to <16 x double> + ; CHECK: cost of 64 {{.*}} sitofp + %r247 = sitofp <16 x i16> undef to <16 x double> + ; CHECK: cost of 192 {{.*}} uitofp + %r248 = uitofp <16 x i64> undef to <16 x double> + ; CHECK: cost of 192 {{.*}} sitofp + %r249 = sitofp <16 x i64> undef to <16 x double> + ;CHECK: cost of 0 {{.*}} ret ret i32 undef } diff --git a/test/Analysis/CostModel/ARM/select.ll b/test/Analysis/CostModel/ARM/select.ll index 96afccfc8c..34ed1eefda 100644 --- a/test/Analysis/CostModel/ARM/select.ll +++ b/test/Analysis/CostModel/ARM/select.ll @@ -34,13 +34,26 @@ define void @casts() { %v12 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> undef ; CHECK: cost of 1 {{.*}} select %v13 = select <8 x i1> undef, <8 x i16> undef, <8 x i16> undef + ; CHECK: cost of 40 {{.*}} select + %v13b = select <16 x i1> undef, <16 x i16> undef, <16 x i16> undef ; CHECK: cost of 1 {{.*}} select %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef ; CHECK: cost of 1 {{.*}} select %v15 = select <4 x i1> undef, <4 x i32> undef, <4 x i32> undef + ; CHECK: cost of 41 {{.*}} select + %v15b = select <8 x i1> undef, <8 x i32> undef, <8 x i32> undef + ; CHECK: cost of 82 {{.*}} select + %v15c = select <16 x i1> undef, <16 x i32> undef, <16 x i32> undef + ; CHECK: cost of 1 {{.*}} select %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef + ; CHECK: cost of 19 {{.*}} select + %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef + ; CHECK: cost of 50 {{.*}} select + %v16b = select <8 x i1> undef, <8 x i64> undef, <8 x i64> undef + ; CHECK: cost of 100 {{.*}} select + %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef ; CHECK: cost of 1 {{.*}} select %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll index f0521bae48..5f9444157a 100644 --- a/test/Analysis/CostModel/X86/arith.ll +++ b/test/Analysis/CostModel/X86/arith.ll @@ -72,3 +72,57 @@ define i32 @fmul(i32 %arg) { %B = fmul <8 x float> undef, undef ret i32 undef } + +; AVX: shift +; AVX2: shift +define void @shift() { + ; AVX: cost of 2 {{.*}} shl + ; AVX2: cost of 1 {{.*}} shl + %A0 = shl <4 x i32> undef, undef + ; AVX: cost of 2 {{.*}} shl + ; AVX2: cost of 1 {{.*}} shl + %A1 = shl <2 x i64> undef, undef + + ; AVX: cost of 2 {{.*}} lshr + ; AVX2: cost of 1 {{.*}} lshr + %B0 = lshr <4 x i32> undef, undef + ; AVX: cost of 2 {{.*}} lshr + ; AVX2: cost of 1 {{.*}} lshr + %B1 = lshr <2 x i64> undef, undef + + ; AVX: cost of 2 {{.*}} ashr + ; AVX2: cost of 1 {{.*}} ashr + %C0 = ashr <4 x i32> undef, undef + ; AVX: cost of 6 {{.*}} ashr + ; AVX2: cost of 6 {{.*}} ashr + %C1 = ashr <2 x i64> undef, undef + + ret void +} + +; AVX: avx2shift +; AVX2: avx2shift +define void @avx2shift() { + ; AVX: cost of 2 {{.*}} shl + ; AVX2: cost of 1 {{.*}} shl + %A0 = shl <8 x i32> undef, undef + ; AVX: cost of 2 {{.*}} shl + ; AVX2: cost of 1 {{.*}} shl + %A1 = shl <4 x i64> undef, undef + + ; AVX: cost of 2 {{.*}} lshr + ; AVX2: cost of 1 {{.*}} lshr + %B0 = lshr <8 x i32> undef, undef + ; AVX: cost of 2 {{.*}} lshr + ; AVX2: cost of 1 {{.*}} lshr + %B1 = lshr <4 x i64> undef, undef + + ; AVX: cost of 2 {{.*}} ashr + ; AVX2: cost of 1 {{.*}} ashr + %C0 = ashr <8 x i32> undef, undef + ; AVX: cost of 12 {{.*}} ashr + ; AVX2: cost of 12 {{.*}} ashr + %C1 = ashr <4 x i64> undef, undef + + ret void +} diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll index bacc778691..b69b3bf630 100644 --- a/test/Analysis/CostModel/X86/cast.ll +++ b/test/Analysis/CostModel/X86/cast.ll @@ -44,9 +44,9 @@ define i32 @zext_sext(<8 x i1> %in) { %B = zext <8 x i16> undef to <8 x i32> ;CHECK: cost of 1 {{.*}} sext %C = sext <4 x i32> undef to <4 x i64> - ;CHECK: cost of 8 {{.*}} sext + ;CHECK: cost of 6 {{.*}} sext %C1 = sext <4 x i8> undef to <4 x i64> - ;CHECK: cost of 8 {{.*}} sext + ;CHECK: cost of 6 {{.*}} sext %C2 = sext <4 x i16> undef to <4 x i64> ;CHECK: cost of 1 {{.*}} zext @@ -77,3 +77,78 @@ define i32 @masks4(<4 x i1> %in) { ret i32 undef } +define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { + ; CHECK: cost of 3 {{.*}} sitofp + %A1 = sitofp <4 x i1> %a to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %A2 = sitofp <4 x i1> %a to <4 x double> + + ; CHECK: cost of 3 {{.*}} sitofp + %B1 = sitofp <4 x i8> %b to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %B2 = sitofp <4 x i8> %b to <4 x double> + + ; CHECK: cost of 3 {{.*}} sitofp + %C1 = sitofp <4 x i16> %c to <4 x float> + ; CHECK: cost of 3 {{.*}} sitofp + %C2 = sitofp <4 x i16> %c to <4 x double> + + ; CHECK: cost of 1 {{.*}} sitofp + %D1 = sitofp <4 x i32> %d to <4 x float> + ; CHECK: cost of 1 {{.*}} sitofp + %D2 = sitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { + ; CHECK: cost of 8 {{.*}} sitofp + %A1 = sitofp <8 x i1> %a to <8 x float> + + ; CHECK: cost of 8 {{.*}} sitofp + %B1 = sitofp <8 x i8> %b to <8 x float> + + ; CHECK: cost of 5 {{.*}} sitofp + %C1 = sitofp <8 x i16> %c to <8 x float> + + ; CHECK: cost of 1 {{.*}} sitofp + %D1 = sitofp <8 x i32> %d to <8 x float> + ret void +} + +define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { + ; CHECK: cost of 7 {{.*}} uitofp + %A1 = uitofp <4 x i1> %a to <4 x float> + ; CHECK: cost of 7 {{.*}} uitofp + %A2 = uitofp <4 x i1> %a to <4 x double> + + ; CHECK: cost of 2 {{.*}} uitofp + %B1 = uitofp <4 x i8> %b to <4 x float> + ; CHECK: cost of 2 {{.*}} uitofp + %B2 = uitofp <4 x i8> %b to <4 x double> + + ; CHECK: cost of 2 {{.*}} uitofp + %C1 = uitofp <4 x i16> %c to <4 x float> + ; CHECK: cost of 2 {{.*}} uitofp + %C2 = uitofp <4 x i16> %c to <4 x double> + + ; CHECK: cost of 6 {{.*}} uitofp + %D1 = uitofp <4 x i32> %d to <4 x float> + ; CHECK: cost of 6 {{.*}} uitofp + %D2 = uitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { + ; CHECK: cost of 6 {{.*}} uitofp + %A1 = uitofp <8 x i1> %a to <8 x float> + + ; CHECK: cost of 5 {{.*}} uitofp + %B1 = uitofp <8 x i8> %b to <8 x float> + + ; CHECK: cost of 5 {{.*}} uitofp + %C1 = uitofp <8 x i16> %c to <8 x float> + + ; CHECK: cost of 9 {{.*}} uitofp + %D1 = uitofp <8 x i32> %d to <8 x float> + ret void +} diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg index d507d3fd71..444b7dc274 100644 --- a/test/Analysis/Profiling/lit.local.cfg +++ b/test/Analysis/Profiling/lit.local.cfg @@ -11,3 +11,6 @@ root = getRoot(config) # doesn't have any JIT at present so they will fail when run there. if root.host_arch in ['AArch64']: config.unsupported = True + +if 'hexagon' in root.target_triple: + config.unsupported = True diff --git a/test/Analysis/RegionInfo/Stats/lit.local.cfg b/test/Analysis/RegionInfo/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Analysis/RegionInfo/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Analysis/RegionInfo/Stats/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll index ac77ab36e6..d7ef79cf6b 100644 --- a/test/Analysis/RegionInfo/Stats/block_sort.ll +++ b/test/Analysis/RegionInfo/block_sort.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats -analyze < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll index 1145ffdba0..0da4e5dca3 100644 --- a/test/Analysis/RegionInfo/Stats/cond_loop.ll +++ b/test/Analysis/RegionInfo/cond_loop.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll index 6b398800db..53f13c10ea 100644 --- a/test/Analysis/RegionInfo/Stats/condition_complicated.ll +++ b/test/Analysis/RegionInfo/condition_complicated.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll index f551108d60..fd04afc20d 100644 --- a/test/Analysis/RegionInfo/Stats/condition_complicated_2.ll +++ b/test/Analysis/RegionInfo/condition_complicated_2.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll index 5e4d9d2f8b..88c45c2e6e 100644 --- a/test/Analysis/RegionInfo/Stats/condition_forward_edge.ll +++ b/test/Analysis/RegionInfo/condition_forward_edge.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll index e48413a4c2..bfb0df84b4 100644 --- a/test/Analysis/RegionInfo/Stats/condition_same_exit.ll +++ b/test/Analysis/RegionInfo/condition_same_exit.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll index 00d9ed24e1..3f93a6ecd3 100644 --- a/test/Analysis/RegionInfo/Stats/condition_simple.ll +++ b/test/Analysis/RegionInfo/condition_simple.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll index b84abecc16..ac409ec1bb 100644 --- a/test/Analysis/RegionInfo/Stats/exit_in_condition.ll +++ b/test/Analysis/RegionInfo/exit_in_condition.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll index 8e588286a5..61abef8ff7 100644 --- a/test/Analysis/RegionInfo/Stats/infinite_loop.ll +++ b/test/Analysis/RegionInfo/infinite_loop.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s diff --git a/test/Analysis/RegionInfo/Stats/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll index a8227e340c..56e83cfdeb 100644 --- a/test/Analysis/RegionInfo/Stats/infinite_loop_2.ll +++ b/test/Analysis/RegionInfo/infinite_loop_2.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll index b09c9c1e59..4538f0f785 100644 --- a/test/Analysis/RegionInfo/Stats/infinite_loop_3.ll +++ b/test/Analysis/RegionInfo/infinite_loop_3.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s diff --git a/test/Analysis/RegionInfo/Stats/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll index 681c305ce9..4ac9068f0d 100644 --- a/test/Analysis/RegionInfo/Stats/infinite_loop_4.ll +++ b/test/Analysis/RegionInfo/infinite_loop_4.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll index 08d2ba8e35..4c1c8654ca 100644 --- a/test/Analysis/RegionInfo/Stats/loop_with_condition.ll +++ b/test/Analysis/RegionInfo/loop_with_condition.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s diff --git a/test/Analysis/RegionInfo/Stats/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll index 6449949df8..9efe619ad9 100644 --- a/test/Analysis/RegionInfo/Stats/loops_1.ll +++ b/test/Analysis/RegionInfo/loops_1.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll index dc4a1adffb..ca7eca75af 100644 --- a/test/Analysis/RegionInfo/Stats/loops_2.ll +++ b/test/Analysis/RegionInfo/loops_2.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll index 1474e033e5..55001c7f43 100644 --- a/test/Analysis/RegionInfo/Stats/mix_1.ll +++ b/test/Analysis/RegionInfo/mix_1.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s diff --git a/test/Analysis/RegionInfo/Stats/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll index a3707a1987..3e73b3a328 100644 --- a/test/Analysis/RegionInfo/Stats/nested_loops.ll +++ b/test/Analysis/RegionInfo/nested_loops.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s diff --git a/test/Analysis/RegionInfo/Stats/next.ll b/test/Analysis/RegionInfo/next.ll index 890b4f2300..b22bbcc2b6 100644 --- a/test/Analysis/RegionInfo/Stats/next.ll +++ b/test/Analysis/RegionInfo/next.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/paper.ll b/test/Analysis/RegionInfo/paper.ll index 96c87e0559..0398d2baa2 100644 --- a/test/Analysis/RegionInfo/Stats/paper.ll +++ b/test/Analysis/RegionInfo/paper.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/RegionInfo/Stats/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll index e75661e890..25713420a3 100644 --- a/test/Analysis/RegionInfo/Stats/two_loops_same_header.ll +++ b/test/Analysis/RegionInfo/two_loops_same_header.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -regions -analyze < %s | FileCheck %s ; RUN: opt -regions -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s ; RUN: opt -regions -print-region-style=bb -analyze < %s 2>&1 | FileCheck -check-prefix=BBIT %s diff --git a/test/Analysis/ScalarEvolution/scev-invalid.ll b/test/Analysis/ScalarEvolution/scev-invalid.ll new file mode 100644 index 0000000000..aac0d319ae --- /dev/null +++ b/test/Analysis/ScalarEvolution/scev-invalid.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -S -indvars -loop-unroll | FileCheck %s +; +; PR15570: SEGV: SCEV back-edge info invalid after dead code removal. +; +; Indvars creates a SCEV expression for the loop's back edge taken +; count, then determines that the comparison is always true and +; removes it. +; +; When loop-unroll asks for the expression, it contains a NULL +; SCEVUnknkown (as a CallbackVH). +; +; forgetMemoizedResults should invalidate the backedge taken count expression. + +; CHECK: @test +; CHECK-NOT: phi +; CHECK-NOT: icmp +; CHECK: ret void +define void @test() { +entry: + %xor1 = xor i32 0, 1 + br label %b17 + +b17: + br i1 undef, label %b22, label %b18 + +b18: + %phi1 = phi i32 [ %add1, %b18 ], [ %xor1, %b17 ] + %add1 = add nsw i32 %phi1, -1 + %cmp1 = icmp sgt i32 %add1, 0 + br i1 %cmp1, label %b18, label %b22 + +b22: + ret void +} diff --git a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll new file mode 100644 index 0000000000..f1edb4482c --- /dev/null +++ b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll @@ -0,0 +1,104 @@ +; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s + +; Generated with "clang -cc1 -disable-llvm-optzns -O1 -emit-llvm" +; #include <new> +; struct Foo { long i; }; +; struct Bar { void *p; }; +; long foo(int n) { +; Foo *f = new Foo; +; f->i = 1; +; for (int i=0; i<n; ++i) { +; Bar *b = new (f) Bar; +; b->p = 0; +; f = new (f) Foo; +; f->i = i; +; } +; return f->i; +; } + +; Basic AA says MayAlias, TBAA says NoAlias +; CHECK: MayAlias: i64* %i5, i8** %p +; CHECK: NoAlias: store i64 %conv, i64* %i5, align 8, !tbaa !4 <-> store i8* null, i8** %p, align 8, !tbaa !3 + +%struct.Foo = type { i64 } +%struct.Bar = type { i8* } + +define i64 @_Z3fooi(i32 %n) #0 { +entry: + %n.addr = alloca i32, align 4 + %f = alloca %struct.Foo*, align 8 + %i1 = alloca i32, align 4 + %b = alloca %struct.Bar*, align 8 + store i32 %n, i32* %n.addr, align 4, !tbaa !0 + %call = call noalias i8* @_Znwm(i64 8) + %0 = bitcast i8* %call to %struct.Foo* + store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !3 + %1 = load %struct.Foo** %f, align 8, !tbaa !3 + %i = getelementptr inbounds %struct.Foo* %1, i32 0, i32 0 + store i64 1, i64* %i, align 8, !tbaa !4 + store i32 0, i32* %i1, align 4, !tbaa !0 + br label %for.cond + +for.cond: + %2 = load i32* %i1, align 4, !tbaa !0 + %3 = load i32* %n.addr, align 4, !tbaa !0 + %cmp = icmp slt i32 %2, %3 + br i1 %cmp, label %for.body, label %for.end + +for.body: + %4 = load %struct.Foo** %f, align 8, !tbaa !3 + %5 = bitcast %struct.Foo* %4 to i8* + %new.isnull = icmp eq i8* %5, null + br i1 %new.isnull, label %new.cont, label %new.notnull + +new.notnull: + %6 = bitcast i8* %5 to %struct.Bar* + br label %new.cont + +new.cont: + %7 = phi %struct.Bar* [ %6, %new.notnull ], [ null, %for.body ] + store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !3 + %8 = load %struct.Bar** %b, align 8, !tbaa !3 + %p = getelementptr inbounds %struct.Bar* %8, i32 0, i32 0 + store i8* null, i8** %p, align 8, !tbaa !3 + %9 = load %struct.Foo** %f, align 8, !tbaa !3 + %10 = bitcast %struct.Foo* %9 to i8* + %new.isnull2 = icmp eq i8* %10, null + br i1 %new.isnull2, label %new.cont4, label %new.notnull3 + +new.notnull3: + %11 = bitcast i8* %10 to %struct.Foo* + br label %new.cont4 + +new.cont4: + %12 = phi %struct.Foo* [ %11, %new.notnull3 ], [ null, %new.cont ] + store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !3 + %13 = load i32* %i1, align 4, !tbaa !0 + %conv = sext i32 %13 to i64 + %14 = load %struct.Foo** %f, align 8, !tbaa !3 + %i5 = getelementptr inbounds %struct.Foo* %14, i32 0, i32 0 + store i64 %conv, i64* %i5, align 8, !tbaa !4 + br label %for.inc + +for.inc: + %15 = load i32* %i1, align 4, !tbaa !0 + %inc = add nsw i32 %15, 1 + store i32 %inc, i32* %i1, align 4, !tbaa !0 + br label %for.cond + +for.end: + %16 = load %struct.Foo** %f, align 8, !tbaa !3 + %i6 = getelementptr inbounds %struct.Foo* %16, i32 0, i32 0 + %17 = load i64* %i6, align 8, !tbaa !4 + ret i64 %17 +} + +declare noalias i8* @_Znwm(i64) + +attributes #0 = { nounwind } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"any pointer", metadata !1} +!4 = metadata !{metadata !"long", metadata !1} diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll index b2256b10a8..df70149a33 100644 --- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll +++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll @@ -22,4 +22,11 @@ define i32 @main() nounwind readonly { declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone -!0 = metadata !{i32 459008, metadata !0, metadata !0, metadata !0, i32 38, metadata !0} ; [ DW_TAG_auto_variable ] +!7 = metadata !{metadata !1} +!6 = metadata !{i32 786449, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !7, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786688, metadata !1, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] +!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 ()* @main, null, null, null, i32 1} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 786468, metadata !6, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3da7c181fa..8faec8be47 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -25,6 +25,7 @@ set(LLVM_TEST_DEPENDS UnitTests llvm-objdump llvm-readobj llvm-rtdyld + llvm-symbolizer macho-dump opt profile_rt-shared FileCheck count not diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll index 64217695d8..c62edf6503 100644 --- a/test/CodeGen/AArch64/alloca.ll +++ b/test/CodeGen/AArch64/alloca.ll @@ -71,8 +71,8 @@ define void @test_variadic_alloca(i64 %n, ...) { ; CHECK: sub sp, sp, #208 ; CHECK: stp x29, x30, [sp, #192] ; CHECK: add x29, sp, #192 -; CHECK: sub x9, x29, #192 -; CHECK: add x8, x9, #0 +; CHECK: sub [[TMP:x[0-9]+]], x29, #192 +; CHECK: add x8, [[TMP]], #0 ; CHECK: str q7, [x8, #112] ; [...] ; CHECK: str q1, [x8, #16] @@ -131,4 +131,4 @@ define void @test_scoped_alloca(i64 %n) { ; CHECK: mov sp, [[SAVED_SP]] ret void -}
\ No newline at end of file +} diff --git a/test/CodeGen/ARM/Stats/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll index a63cdd46e2..4783f37076 100644 --- a/test/CodeGen/ARM/Stats/2007-03-13-InstrSched.ll +++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ ; RUN: -mattr=+v6 | grep r9 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \ diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll index 29ccf22eee..91a9903f38 100644 --- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -76,27 +76,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!3} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ] -!2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ] !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9} !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] !7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] !8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] -!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] !10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] !11 = metadata !{null, metadata !12, metadata !13} !12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ] !13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ] !15 = metadata !{null, metadata !12} -!16 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ] -!17 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ] +!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ] !18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ] !19 = metadata !{metadata !13, metadata !13, metadata !1} -!20 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] +!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] !21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ] !22 = metadata !{metadata !13} !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ] @@ -104,7 +104,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ] !27 = metadata !{i32 17, i32 0, metadata !28, null} -!28 = metadata !{i32 786443, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ] +!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ] !29 = metadata !{i32 18, i32 0, metadata !28, null} !30 = metadata !{i32 20, i32 0, metadata !28, null} !31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] @@ -112,11 +112,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ] !34 = metadata !{i32 11, i32 0, metadata !16, null} !35 = metadata !{i32 11, i32 0, metadata !36, null} -!36 = metadata !{i32 786443, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] -!37 = metadata !{i32 786443, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ] +!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] !38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, i32 0} ; [ DW_TAG_auto_variable ] -!39 = metadata !{i32 786443, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ] -!40 = metadata !{i32 786443, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ] +!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ] !41 = metadata !{i32 24, i32 0, metadata !39, null} !42 = metadata !{i32 25, i32 0, metadata !39, null} !43 = metadata !{i32 26, i32 0, metadata !39, null} @@ -124,3 +124,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !45 = metadata !{i32 27, i32 0, metadata !39, null} !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20} !47 = metadata !{i32 0} +!48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"} diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index c8ce6eec62..1d1b89a34f 100644 --- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -76,24 +76,17 @@ entry: } !llvm.dbg.cu = !{!2} -!40 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8, metadata !9} -!41 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17} -!42 = metadata !{metadata !10, metadata !11} -!43 = metadata !{metadata !18, metadata !19} -!44 = metadata !{metadata !21, metadata !22} -!45 = metadata !{metadata !24, metadata !25} -!46 = metadata !{metadata !27, metadata !28} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786478, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, metadata !""} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5, metadata !5} -!5 = metadata !{i32 786468, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ] -!8 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ] -!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 786468, metadata !1, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 786478, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786478, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 786478, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ] +!9 = metadata !{i32 786478, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ] !10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !12 = metadata !{i32 786443, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] @@ -124,3 +117,11 @@ entry: !37 = metadata !{i32 13, i32 0, metadata !26, null} !38 = metadata !{i32 16, i32 0, metadata !9, null} !39 = metadata !{i32 16, i32 0, metadata !29, null} +!40 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8, metadata !9} +!41 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17} +!42 = metadata !{metadata !10, metadata !11} +!43 = metadata !{metadata !18, metadata !19} +!44 = metadata !{metadata !21, metadata !22} +!45 = metadata !{metadata !24, metadata !25} +!46 = metadata !{metadata !27, metadata !28} +!47 = metadata !{metadata !"foo.c", metadata !"/tmp/"} diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll index 2cd4acea63..266609b8ce 100644 --- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll @@ -73,24 +73,17 @@ define i32 @get5(i32 %a) nounwind optsize ssp { declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!40 = metadata !{metadata !1, metadata !6, metadata !7, metadata !8, metadata !9} -!42 = metadata !{metadata !10, metadata !11} -!43 = metadata !{metadata !13, metadata !14} -!44 = metadata !{metadata !16, metadata !17} -!45 = metadata !{metadata !19, metadata !20} -!46 = metadata !{metadata !27, metadata !28} -!41 = metadata !{metadata !22, metadata !23, metadata !24, metadata !25, metadata !26} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"ss3.c", metadata !"/private/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"ss3.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !40, metadata !41, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"get1", metadata !"get1", metadata !"", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786453, metadata !2, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} -!5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ] -!8 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ] -!9 = metadata !{i32 786478, i32 0, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 786478, metadata !2, metadata !"get2", metadata !"get2", metadata !"", metadata !2, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786478, metadata !2, metadata !"get3", metadata !"get3", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 786478, metadata !2, metadata !"get4", metadata !"get4", metadata !"", metadata !2, i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ] +!9 = metadata !{i32 786478, metadata !2, metadata !"get5", metadata !"get5", metadata !"", metadata !2, i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ] !10 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !12 = metadata !{i32 786443, metadata !1, i32 5, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] @@ -121,3 +114,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !37 = metadata !{i32 14, i32 32, metadata !21, null} !38 = metadata !{i32 17, i32 16, metadata !9, null} !39 = metadata !{i32 17, i32 32, metadata !29, null} +!40 = metadata !{metadata !1, metadata !6, metadata !7, metadata !8, metadata !9} +!41 = metadata !{metadata !22, metadata !23, metadata !24, metadata !25, metadata !26} +!42 = metadata !{metadata !10, metadata !11} +!43 = metadata !{metadata !13, metadata !14} +!44 = metadata !{metadata !16, metadata !17} +!45 = metadata !{metadata !19, metadata !20} +!46 = metadata !{metadata !27, metadata !28} +!47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/ARM/Stats/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll index b21bb006e3..1b21f7571d 100644 --- a/test/CodeGen/ARM/Stats/2011-12-14-machine-sink.ll +++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; Radar 10266272 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" diff --git a/test/CodeGen/ARM/DbgValueOtherTargets.test b/test/CodeGen/ARM/DbgValueOtherTargets.test index 8739a43640..bf90891de0 100644 --- a/test/CodeGen/ARM/DbgValueOtherTargets.test +++ b/test/CodeGen/ARM/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=arm -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/ARM/Stats/lit.local.cfg b/test/CodeGen/ARM/Stats/lit.local.cfg deleted file mode 100644 index f6194d2421..0000000000 --- a/test/CodeGen/ARM/Stats/lit.local.cfg +++ /dev/null @@ -1,8 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -targets = set(config.root.targets_to_build.split()) -if not 'ARM' in targets: - config.unsupported = True - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/CodeGen/ARM/a15-SD-dep.ll b/test/CodeGen/ARM/a15-SD-dep.ll new file mode 100644 index 0000000000..a52468e5be --- /dev/null +++ b/test/CodeGen/ARM/a15-SD-dep.ll @@ -0,0 +1,58 @@ +; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -disable-a15-sd-optimization -verify-machineinstrs < %s | FileCheck -check-prefix=DISABLED %s +; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck -check-prefix=ENABLED %s + +; CHECK-ENABLED: t1: +; CHECK-DISABLED: t1: +define <2 x float> @t1(float %f) { + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] + ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0] + %i1 = insertelement <2 x float> undef, float %f, i32 1 + %i2 = fadd <2 x float> %i1, %i1 + ret <2 x float> %i2 +} + +; CHECK-ENABLED: t2: +; CHECK-DISABLED: t2: +define <4 x float> @t2(float %g, float %f) { + ; CHECK-ENABLED: vdup.32 q{{[0-9]*}}, d0[0] + ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0] + %i1 = insertelement <4 x float> undef, float %f, i32 1 + %i2 = fadd <4 x float> %i1, %i1 + ret <4 x float> %i2 +} + +; CHECK-ENABLED: t3: +; CHECK-DISABLED: t3: +define arm_aapcs_vfpcc <2 x float> @t3(float %f) { + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] + ; CHECK-DISABLED-NOT: vdup.32 d{{[0-9]*}}, d0[0] + %i1 = insertelement <2 x float> undef, float %f, i32 1 + %i2 = fadd <2 x float> %i1, %i1 + ret <2 x float> %i2 +} + +; CHECK-ENABLED: t4: +; CHECK-DISABLED: t4: +define <2 x float> @t4(float %f) { + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d0[0] + ; CHECK-DISABLED-NOT: vdup + %i1 = insertelement <2 x float> undef, float %f, i32 1 + br label %b + + ; Block %b has an S-reg as live-in. +b: + %i2 = fadd <2 x float> %i1, %i1 + ret <2 x float> %i2 +} + +; CHECK-ENABLED: t5: +; CHECK-DISABLED: t5: +define arm_aapcs_vfpcc <4 x float> @t5(<4 x float> %q, float %f) { + ; CHECK-ENABLED: vdup.32 d{{[0-9]*}}, d{{[0-9]*}}[0] + ; CHECK-ENABLED: vadd.f32 + ; CHECK-ENABLED-NEXT: bx lr + ; CHECK-DISABLED-NOT: vdup + %i1 = insertelement <4 x float> %q, float %f, i32 1 + %i2 = fadd <4 x float> %i1, %i1 + ret <4 x float> %i2 +} diff --git a/test/CodeGen/ARM/a15-partial-update.ll b/test/CodeGen/ARM/a15-partial-update.ll new file mode 100644 index 0000000000..6306790d15 --- /dev/null +++ b/test/CodeGen/ARM/a15-partial-update.ll @@ -0,0 +1,38 @@ +; RUN: llc -O1 -mcpu=cortex-a15 -mtriple=armv7-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s + +; CHECK: t1: +define <2 x float> @t1(float* %A, <2 x float> %B) { +; The generated code for this test uses a vld1.32 instruction +; to write the lane 1 of a D register containing the value of +; <2 x float> %B. Since the D register is defined, it would +; be incorrect to fully write it (with a vmov.f64) before the +; vld1.32 instruction. The test checks that a vmov.f64 was not +; generated. + +; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}}, + %tmp2 = load float* %A, align 4 + %tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1 + ret <2 x float> %tmp3 +} + +; CHECK: t2: +define void @t2(<4 x i8> *%in, <4 x i8> *%out, i32 %n) { +entry: + br label %loop +loop: +; The code generated by this test uses a vld1.32 instruction. +; We check that a dependency breaking vmov* instruction was +; generated. + +; CHECK: vmov.{{.*}} d{{[0-9]+}}, + %oldcount = phi i32 [0, %entry], [%newcount, %loop] + %newcount = add i32 %oldcount, 1 + %p1 = getelementptr <4 x i8> *%in, i32 %newcount + %p2 = getelementptr <4 x i8> *%out, i32 %newcount + %tmp1 = load <4 x i8> *%p1, align 4 + store <4 x i8> %tmp1, <4 x i8> *%p2 + %cmp = icmp eq i32 %newcount, %n + br i1 %cmp, label %loop, label %ret +ret: + ret void +} diff --git a/test/CodeGen/ARM/Stats/addrmode.ll b/test/CodeGen/ARM/addrmode.ll index 6da90897b9..748d258044 100644 --- a/test/CodeGen/ARM/Stats/addrmode.ll +++ b/test/CodeGen/ARM/addrmode.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=arm -stats 2>&1 | grep asm-printer | grep 4 define i32 @t1(i32 %a) { diff --git a/test/CodeGen/ARM/call_nolink.ll b/test/CodeGen/ARM/call_nolink.ll index 00b16888f3..5ec7f74a60 100644 --- a/test/CodeGen/ARM/call_nolink.ll +++ b/test/CodeGen/ARM/call_nolink.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \ -; RUN: not grep "bx lr" +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s %struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* } @r = external global [14 x i32] ; <[14 x i32]*> [#uses=4] @@ -8,6 +7,8 @@ @numi = external global i32 ; <i32*> [#uses=1] @counter = external global [2 x i32] ; <[2 x i32]*> [#uses=1] +; CHECK: main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i: +; CHECK-NOT: bx lr define void @main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i() { newFuncRoot: @@ -50,3 +51,12 @@ bb115.i.i: ; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot icmp slt i32 %tmp166.i.i, %tmp168.i.i ; <i1>:0 [#uses=1] br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub } + +define void @PR15520(void ()* %fn) { + call void %fn() + ret void + +; CHECK: PR15520: +; CHECK: mov lr, pc +; CHECK: mov pc, r0 +} diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll index 7316452cd6..769ba55eb9 100644 --- a/test/CodeGen/ARM/commute-movcc.ll +++ b/test/CodeGen/ARM/commute-movcc.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s -; RUN: llc -mtriple=armv7-apple-ios -disable-code-place < %s | FileCheck %s +; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s +; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s ; LLVM IR optimizers canonicalize icmp+select this way. ; Make sure that TwoAddressInstructionPass can commute the corresponding diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll index 1694d6412f..33c8e9daae 100644 --- a/test/CodeGen/ARM/debug-info-arg.ll +++ b/test/CodeGen/ARM/debug-info-arg.ll @@ -30,26 +30,24 @@ declare void @foobar(i64, i64) declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!30 = metadata !{metadata !1} -!31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772", metadata !0} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !30, null, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{null} !5 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] -!6 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] -!7 = metadata !{i32 786451, metadata !0, metadata !"tag_s", metadata !2, i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!6 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ] !8 = metadata !{metadata !9, metadata !11, metadata !12} -!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !2, i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] -!10 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!11 = metadata !{i32 786445, metadata !7, metadata !"y", metadata !2, i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ] -!12 = metadata !{i32 786445, metadata !7, metadata !"z", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] +!9 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"x", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] +!10 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!11 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"y", i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ] +!12 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"z", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] !13 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] !14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ] -!15 = metadata !{i32 786454, metadata !0, metadata !"UInt64", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] -!16 = metadata !{i32 786468, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!15 = metadata !{i32 786454, metadata !32, metadata !0, metadata !"UInt64", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] +!16 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] !17 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ] !18 = metadata !{i32 786689, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] !19 = metadata !{i32 786689, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] @@ -63,3 +61,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !27 = metadata !{i32 786443, metadata !1, i32 11, i32 107, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] !28 = metadata !{i32 13, i32 5, metadata !27, null} !29 = metadata !{i32 14, i32 1, metadata !27, null} +!30 = metadata !{metadata !1} +!31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19} +!32 = metadata !{metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772"} diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll index 2cb24bd11e..d0bfecc5af 100644 --- a/test/CodeGen/ARM/debug-info-blocks.ll +++ b/test/CodeGen/ARM/debug-info-blocks.ll @@ -93,138 +93,136 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load } !llvm.dbg.cu = !{!0} -!147 = metadata !{metadata !1, metadata !1, metadata !5, metadata !5, metadata !9, metadata !14, metadata !19, metadata !19, metadata !14, metadata !14, metadata !14, metadata !19, metadata !19, metadata !19} -!148 = metadata !{metadata !23} -!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !"Apple clang version 2.1", i1 true, i1 false, metadata !"", i32 2, metadata !147, null, metadata !148, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786433, metadata !0, metadata !"", metadata !2, i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] -!2 = metadata !{i32 786473, metadata !"header.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!0 = metadata !{i32 786449, i32 16, metadata !40, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, null, metadata !148, null, metadata !""} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!2 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ] !3 = metadata !{metadata !4} !4 = metadata !{i32 786472, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ] -!5 = metadata !{i32 786433, metadata !0, metadata !"Mode", metadata !6, i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] -!6 = metadata !{i32 786473, metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786433, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!6 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ] !7 = metadata !{metadata !8} !8 = metadata !{i32 786472, metadata !"One", i64 0} ; [ DW_TAG_enumerator ] -!9 = metadata !{i32 786433, metadata !0, metadata !"", metadata !10, i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] -!10 = metadata !{i32 786473, metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!9 = metadata !{i32 786433, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!10 = metadata !{i32 786473, metadata !149} ; [ DW_TAG_file_type ] !11 = metadata !{metadata !12, metadata !13} !12 = metadata !{i32 786472, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ] !13 = metadata !{i32 786472, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ] -!14 = metadata !{i32 786433, metadata !0, metadata !"", metadata !15, i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] -!15 = metadata !{i32 786473, metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!14 = metadata !{i32 786433, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!15 = metadata !{i32 786473, metadata !150} ; [ DW_TAG_file_type ] !16 = metadata !{metadata !17, metadata !18} !17 = metadata !{i32 786472, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ] !18 = metadata !{i32 786472, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ] -!19 = metadata !{i32 786433, metadata !0, metadata !"", metadata !20, i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] -!20 = metadata !{i32 786473, metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!19 = metadata !{i32 786433, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!20 = metadata !{i32 786473, metadata !151} ; [ DW_TAG_file_type ] !21 = metadata !{metadata !22} !22 = metadata !{i32 786472, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ] -!23 = metadata !{i32 786478, i32 0, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ] -!24 = metadata !{i32 786473, metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] -!25 = metadata !{i32 786453, metadata !24, metadata !"", metadata !24, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!23 = metadata !{i32 786478, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ] +!24 = metadata !{i32 786473, metadata !152} ; [ DW_TAG_file_type ] +!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !26 = metadata !{null} !27 = metadata !{i32 786689, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64, null} ; [ DW_TAG_arg_variable ] -!28 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] -!29 = metadata !{i32 786451, metadata !24, metadata !"__block_literal_14", metadata !24, i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!28 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] +!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ] !30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124} -!31 = metadata !{i32 786445, metadata !24, metadata !"__isa", metadata !24, i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] -!32 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] -!33 = metadata !{i32 786445, metadata !24, metadata !"__flags", metadata !24, i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ] -!34 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!35 = metadata !{i32 786445, metadata !24, metadata !"__reserved", metadata !24, i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ] -!36 = metadata !{i32 786445, metadata !24, metadata !"__FuncPtr", metadata !24, i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ] -!37 = metadata !{i32 786445, metadata !24, metadata !"__descriptor", metadata !24, i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ] -!38 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] -!39 = metadata !{i32 786451, metadata !0, metadata !"__block_descriptor_withcopydispose", metadata !40, i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ] -!40 = metadata !{i32 786473, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!31 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] +!32 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!33 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ] +!34 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!35 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__reserved", i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ] +!36 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__FuncPtr", i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ] +!37 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__descriptor", i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ] +!38 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] +!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!40 = metadata !{i32 786473, metadata !153} ; [ DW_TAG_file_type ] !41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47} -!42 = metadata !{i32 786445, metadata !40, metadata !"reserved", metadata !40, i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ] -!43 = metadata !{i32 786468, metadata !0, metadata !"long unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] -!44 = metadata !{i32 786445, metadata !40, metadata !"Size", metadata !40, i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ] -!45 = metadata !{i32 786445, metadata !40, metadata !"CopyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ] -!46 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] -!47 = metadata !{i32 786445, metadata !40, metadata !"DestroyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ] -!48 = metadata !{i32 786445, metadata !24, metadata !"mydata", metadata !24, i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ] -!49 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] -!50 = metadata !{i32 786451, metadata !24, metadata !"", metadata !24, i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!42 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"reserved", i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ] +!43 = metadata !{i32 786468, null, metadata !0, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!44 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"Size", i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ] +!45 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"CopyFuncPtr", i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ] +!46 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] +!47 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"DestroyFuncPtr", i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ] +!48 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ] +!49 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] +!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ] !51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58} -!52 = metadata !{i32 786445, metadata !24, metadata !"__isa", metadata !24, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] -!53 = metadata !{i32 786445, metadata !24, metadata !"__forwarding", metadata !24, i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ] -!54 = metadata !{i32 786445, metadata !24, metadata !"__flags", metadata !24, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ] -!55 = metadata !{i32 786445, metadata !24, metadata !"__size", metadata !24, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ] -!56 = metadata !{i32 786445, metadata !24, metadata !"__copy_helper", metadata !24, i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ] -!57 = metadata !{i32 786445, metadata !24, metadata !"__destroy_helper", metadata !24, i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ] -!58 = metadata !{i32 786445, metadata !24, metadata !"mydata", metadata !24, i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ] -!59 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ] -!60 = metadata !{i32 786451, metadata !24, metadata !"UIMydata", metadata !61, i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ] -!61 = metadata !{i32 786473, metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!52 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] +!53 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__forwarding", i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ] +!54 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ] +!55 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__size", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ] +!56 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__copy_helper", i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ] +!57 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__destroy_helper", i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ] +!58 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ] +!59 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ] +!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ] +!61 = metadata !{i32 786473, metadata !154} ; [ DW_TAG_file_type ] !62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79} !63 = metadata !{i32 786460, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ] -!64 = metadata !{i32 786451, metadata !40, metadata !"NSO", metadata !65, i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ] -!65 = metadata !{i32 786473, metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ] +!65 = metadata !{i32 786473, metadata !155} ; [ DW_TAG_file_type ] !66 = metadata !{metadata !67} -!67 = metadata !{i32 786445, metadata !65, metadata !"isa", metadata !65, i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!67 = metadata !{i32 786445, metadata !155, metadata !65, metadata !"isa", i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] !68 = metadata !{i32 786454, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ] -!69 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ] -!70 = metadata !{i32 786451, metadata !0, metadata !"objc_class", metadata !40, i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] -!71 = metadata !{i32 786445, metadata !61, metadata !"_mydataRef", metadata !61, i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!69 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ] +!70 = metadata !{i32 786451, metadata !40, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!71 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataRef", i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] !72 = metadata !{i32 786454, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ] -!73 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ] -!74 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ] -!75 = metadata !{i32 786445, metadata !61, metadata !"_scale", metadata !61, i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!73 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ] +!74 = metadata !{i32 786470, null, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ] +!75 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_scale", i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] !76 = metadata !{i32 786454, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ] -!77 = metadata !{i32 786473, metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] -!78 = metadata !{i32 786468, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] -!79 = metadata !{i32 786445, metadata !61, metadata !"_mydataFlags", metadata !61, i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] -!80 = metadata !{i32 786451, metadata !0, metadata !"", metadata !61, i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!77 = metadata !{i32 786473, metadata !156} ; [ DW_TAG_file_type ] +!78 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!79 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataFlags", i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ] !81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88} -!82 = metadata !{i32 786445, metadata !61, metadata !"named", metadata !61, i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ] -!83 = metadata !{i32 786468, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] -!84 = metadata !{i32 786445, metadata !61, metadata !"mydataO", metadata !61, i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ] -!85 = metadata !{i32 786445, metadata !61, metadata !"cached", metadata !61, i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ] -!86 = metadata !{i32 786445, metadata !61, metadata !"hasBeenCached", metadata !61, i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ] -!87 = metadata !{i32 786445, metadata !61, metadata !"hasPattern", metadata !61, i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ] -!88 = metadata !{i32 786445, metadata !61, metadata !"isCIMydata", metadata !61, i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ] -!89 = metadata !{i32 786445, metadata !24, metadata !"self", metadata !24, i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ] -!90 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ] -!91 = metadata !{i32 786451, metadata !40, metadata !"MyWork", metadata !24, i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ] +!82 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"named", i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ] +!83 = metadata !{i32 786468, null, metadata !0, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!84 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"mydataO", i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ] +!85 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"cached", i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ] +!86 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasBeenCached", i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ] +!87 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasPattern", i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ] +!88 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"isCIMydata", i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ] +!89 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"self", i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ] +!90 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ] +!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ] !92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123} !93 = metadata !{i32 786460, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ] -!94 = metadata !{i32 786451, metadata !40, metadata !"twork", metadata !95, i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ] -!95 = metadata !{i32 786473, metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ] +!95 = metadata !{i32 786473, metadata !157} ; [ DW_TAG_file_type ] !96 = metadata !{metadata !97} !97 = metadata !{i32 786460, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ] -!98 = metadata !{i32 786445, metadata !24, metadata !"_itemID", metadata !24, i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!98 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_itemID", i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] !99 = metadata !{i32 786454, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ] -!100 = metadata !{i32 786468, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] -!101 = metadata !{i32 786445, metadata !24, metadata !"_library", metadata !24, i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] -!102 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ] -!103 = metadata !{i32 786451, metadata !40, metadata !"MyLibrary2", metadata !104, i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ] -!104 = metadata !{i32 786473, metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!100 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!101 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_library", i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!102 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ] +!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ] +!104 = metadata !{i32 786473, metadata !158} ; [ DW_TAG_file_type ] !105 = metadata !{metadata !106} !106 = metadata !{i32 786460, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ] -!107 = metadata !{i32 786445, metadata !24, metadata !"_bounds", metadata !24, i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!107 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_bounds", i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] !108 = metadata !{i32 786454, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ] -!109 = metadata !{i32 786451, metadata !0, metadata !"CR", metadata !77, i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ] !110 = metadata !{metadata !111, metadata !117} -!111 = metadata !{i32 786445, metadata !77, metadata !"origin", metadata !77, i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ] +!111 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"origin", i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ] !112 = metadata !{i32 786454, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ] -!113 = metadata !{i32 786451, metadata !0, metadata !"CP", metadata !77, i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ] !114 = metadata !{metadata !115, metadata !116} -!115 = metadata !{i32 786445, metadata !77, metadata !"x", metadata !77, i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ] -!116 = metadata !{i32 786445, metadata !77, metadata !"y", metadata !77, i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ] -!117 = metadata !{i32 786445, metadata !77, metadata !"size", metadata !77, i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ] +!115 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"x", i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ] +!116 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"y", i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ] +!117 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"size", i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ] !118 = metadata !{i32 786454, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ] -!119 = metadata !{i32 786451, metadata !0, metadata !"Size", metadata !77, i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ] !120 = metadata !{metadata !121, metadata !122} -!121 = metadata !{i32 786445, metadata !77, metadata !"width", metadata !77, i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ] -!122 = metadata !{i32 786445, metadata !77, metadata !"height", metadata !77, i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ] -!123 = metadata !{i32 786445, metadata !24, metadata !"_data", metadata !24, i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] -!124 = metadata !{i32 786445, metadata !24, metadata !"semi", metadata !24, i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ] +!121 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"width", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ] +!122 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"height", i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ] +!123 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_data", i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ] +!124 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"semi", i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ] !125 = metadata !{i32 786454, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ] -!126 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ] -!127 = metadata !{i32 786451, metadata !0, metadata !"my_struct", metadata !128, i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] -!128 = metadata !{i32 786473, metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ] +!126 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ] +!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!128 = metadata !{i32 786473, metadata !159} ; [ DW_TAG_file_type ] !129 = metadata !{i32 609, i32 144, metadata !23, null} !130 = metadata !{i32 786689, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0, null} ; [ DW_TAG_arg_variable ] !131 = metadata !{i32 609, i32 155, metadata !23, null} @@ -243,3 +241,18 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load !144 = metadata !{i32 612, i32 17, metadata !142, null} !145 = metadata !{i32 613, i32 17, metadata !142, null} !146 = metadata !{i32 615, i32 13, metadata !142, null} +!147 = metadata !{metadata !1, metadata !1, metadata !5, metadata !5, metadata !9, metadata !14, metadata !19, metadata !19, metadata !14, metadata !14, metadata !14, metadata !19, metadata !19, metadata !19} +!148 = metadata !{metadata !23} +!149 = metadata !{metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm"} +!150 = metadata !{metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm"} +!151 = metadata !{metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm"} +!152 = metadata !{metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm"} +!153 = metadata !{metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm"} +!154 = metadata !{metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm"} +!155 = metadata !{metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm"} +!156 = metadata !{metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm"} +!157 = metadata !{metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm"} +!158 = metadata !{metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm"} +!159 = metadata !{metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm"} +!160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"} +!161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"} diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll index f241c069c3..95e6cf2554 100644 --- a/test/CodeGen/ARM/debug-info-branch-folding.ll +++ b/test/CodeGen/ARM/debug-info-branch-folding.ll @@ -38,58 +38,59 @@ declare i32 @printf(i8* nocapture, ...) nounwind declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone -!50 = metadata !{metadata !0, metadata !10, metadata !14} -!51 = metadata !{metadata !18} -!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29} -!53 = metadata !{metadata !30} - !0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} -!5 = metadata !{i32 786454, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] !6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ] -!7 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] !8 = metadata !{metadata !9} !9 = metadata !{i32 786465, i64 0, i64 4} ; [ DW_TAG_subrange_type ] !10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ] -!11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !12 = metadata !{metadata !13} -!13 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !14 = metadata !{i32 786478, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ] -!15 = metadata !{i32 786473, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!16 = metadata !{i32 786453, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ] +!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !17 = metadata !{null} !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ] !20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ] -!21 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] -!22 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ] -!23 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] +!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ] +!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] !24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ] -!25 = metadata !{i32 786443, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ] +!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ] !26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ] !27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ] -!31 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] -!32 = metadata !{i32 786454, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ] -!33 = metadata !{i32 786455, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ] +!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] +!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ] +!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ] !34 = metadata !{metadata !35, metadata !37} -!35 = metadata !{i32 786445, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ] -!36 = metadata !{i32 786454, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] -!37 = metadata !{i32 786445, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ] -!38 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ] +!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ] +!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ] +!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ] !39 = metadata !{i32 79, i32 7, metadata !40, null} -!40 = metadata !{i32 786443, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ] -!41 = metadata !{i32 786443, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ] -!42 = metadata !{i32 786443, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ] -!43 = metadata !{i32 786443, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ] +!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ] +!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ] !44 = metadata !{i32 75, i32 5, metadata !42, null} !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48} -!46 = metadata !{i32 786443, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ] -!47 = metadata !{i32 786443, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ] +!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ] +!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ] !48 = metadata !{i32 95, i32 3, metadata !25, null} !49 = metadata !{i32 99, i32 3, metadata !25, null} +!50 = metadata !{metadata !0, metadata !10, metadata !14} +!51 = metadata !{metadata !18} +!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29} +!53 = metadata !{metadata !30} +!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"} +!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"} diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll index 80096906e5..e3e4d06893 100644 --- a/test/CodeGen/ARM/debug-info-d16-reg.ll +++ b/test/CodeGen/ARM/debug-info-d16-reg.ll @@ -57,22 +57,18 @@ entry: declare i32 @puts(i8* nocapture) nounwind !llvm.dbg.cu = !{!2} -!42 = metadata !{metadata !0, metadata !9, metadata !10} -!43 = metadata !{metadata !16, metadata !17, metadata !18} -!44 = metadata !{metadata !19, metadata !20, metadata !21} -!45 = metadata !{metadata !22, metadata !23, metadata !24} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"/tmp/a.c", metadata !"/tmp", metadata !"(LLVM build 00)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !42, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, null, null, metadata !42, null, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8} !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !6 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] !7 = metadata !{i32 786468, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] !8 = metadata !{i32 786468, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] -!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ] -!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ] +!9 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ] !11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ] !12 = metadata !{metadata !5, metadata !5, metadata !13} !13 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] @@ -87,14 +83,14 @@ declare i32 @puts(i8* nocapture) nounwind !22 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !23 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ] !24 = metadata !{i32 786688, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ] -!25 = metadata !{i32 786443, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] +!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 18, i32 0, i32 2} ; [ DW_TAG_lexical_block ] !26 = metadata !{i32 4, i32 0, metadata !9, null} !27 = metadata !{i32 6, i32 0, metadata !28, null} -!28 = metadata !{i32 786443, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!28 = metadata !{i32 786443, metadata !1, metadata !9, i32 5, i32 0, i32 1} ; [ DW_TAG_lexical_block ] !29 = metadata !{i32 7, i32 0, metadata !28, null} !30 = metadata !{i32 11, i32 0, metadata !0, null} !31 = metadata !{i32 13, i32 0, metadata !32, null} -!32 = metadata !{i32 786443, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!32 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ] !33 = metadata !{i32 14, i32 0, metadata !32, null} !34 = metadata !{i32 17, i32 0, metadata !10, null} !35 = metadata !{i32 19, i32 0, metadata !25, null} @@ -104,3 +100,8 @@ declare i32 @puts(i8* nocapture) nounwind !39 = metadata !{i32 6, i32 0, metadata !28, metadata !37} !40 = metadata !{i32 22, i32 0, metadata !25, null} !41 = metadata !{i32 23, i32 0, metadata !25, null} +!42 = metadata !{metadata !0, metadata !9, metadata !10} +!43 = metadata !{metadata !16, metadata !17, metadata !18} +!44 = metadata !{metadata !19, metadata !20, metadata !21} +!45 = metadata !{metadata !22, metadata !23, metadata !24} +!46 = metadata !{metadata !"a.c", metadata !"/tmp/"} diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll index 89010dd364..038c2296cd 100644 --- a/test/CodeGen/ARM/debug-info-qreg.ll +++ b/test/CodeGen/ARM/debug-info-qreg.ll @@ -36,58 +36,60 @@ declare i32 @printf(i8* nocapture, ...) nounwind declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!50 = metadata !{metadata !0, metadata !10, metadata !14} -!51 = metadata !{metadata !18} -!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29} -!53 = metadata !{metadata !30} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786478, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} -!5 = metadata !{i32 786454, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] !6 = metadata !{i32 786691, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ] -!7 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] !8 = metadata !{metadata !9} !9 = metadata !{i32 786465, i64 0, i64 4} ; [ DW_TAG_subrange_type ] -!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ] -!11 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!10 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !12 = metadata !{metadata !13} -!13 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!14 = metadata !{i32 786478, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ] -!15 = metadata !{i32 786473, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!16 = metadata !{i32 786453, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 786478, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ] +!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ] +!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !17 = metadata !{null} !18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] !19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ] !20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ] -!21 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] -!22 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ] -!23 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] +!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ] +!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] !24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ] -!25 = metadata !{i32 786443, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ] +!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ] !26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ] !27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] !30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ] -!31 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] -!32 = metadata !{i32 786454, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ] -!33 = metadata !{i32 786455, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ] +!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] +!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ] +!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ] !34 = metadata !{metadata !35, metadata !37} -!35 = metadata !{i32 786445, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ] -!36 = metadata !{i32 786454, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] -!37 = metadata !{i32 786445, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ] -!38 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ] +!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ] +!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ] +!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ] !39 = metadata !{i32 79, i32 7, metadata !40, null} -!40 = metadata !{i32 786443, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ] -!41 = metadata !{i32 786443, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ] -!42 = metadata !{i32 786443, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ] -!43 = metadata !{i32 786443, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ] +!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ] +!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ] !44 = metadata !{i32 75, i32 5, metadata !42, null} !45 = metadata !{i32 42, i32 2, metadata !46, metadata !48} -!46 = metadata !{i32 786443, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ] -!47 = metadata !{i32 786443, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ] +!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ] +!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ] !48 = metadata !{i32 95, i32 3, metadata !25, null} !49 = metadata !{i32 99, i32 3, metadata !25, null} +!50 = metadata !{metadata !0, metadata !10, metadata !14} +!51 = metadata !{metadata !18} +!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29} +!53 = metadata !{metadata !30} +!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"} +!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"} diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll index 68fabee15f..f3af0b93c6 100644 --- a/test/CodeGen/ARM/debug-info-s16-reg.ll +++ b/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -62,19 +62,15 @@ declare i32 @puts(i8* nocapture) nounwind optsize declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!47 = metadata !{metadata !0, metadata !6, metadata !7} -!48 = metadata !{metadata !8, metadata !10, metadata !12} -!49 = metadata !{metadata !14, metadata !15, metadata !16} -!50 = metadata !{metadata !17, metadata !18, metadata !22} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, null, null, metadata !47, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786478, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ] !8 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ] !9 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] !10 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ] @@ -90,18 +86,18 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !20 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ] !21 = metadata !{i32 786468, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] !22 = metadata !{i32 786688, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0, null} ; [ DW_TAG_auto_variable ] -!23 = metadata !{i32 786443, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] +!23 = metadata !{i32 786443, metadata !1, metadata !7, i32 18, i32 1, i32 2} ; [ DW_TAG_lexical_block ] !24 = metadata !{i32 4, i32 22, metadata !0, null} !25 = metadata !{i32 4, i32 33, metadata !0, null} !26 = metadata !{i32 4, i32 52, metadata !0, null} !27 = metadata !{i32 6, i32 3, metadata !28, null} -!28 = metadata !{i32 786443, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!28 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ] !29 = metadata !{i32 7, i32 3, metadata !28, null} !30 = metadata !{i32 11, i32 42, metadata !6, null} !31 = metadata !{i32 11, i32 53, metadata !6, null} !32 = metadata !{i32 11, i32 72, metadata !6, null} !33 = metadata !{i32 13, i32 3, metadata !34, null} -!34 = metadata !{i32 786443, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!34 = metadata !{i32 786443, metadata !1, metadata !6, i32 12, i32 1, i32 1} ; [ DW_TAG_lexical_block ] !35 = metadata !{i32 14, i32 3, metadata !34, null} !36 = metadata !{i32 17, i32 15, metadata !7, null} !37 = metadata !{i32 17, i32 28, metadata !7, null} @@ -114,3 +110,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !44 = metadata !{i32 6, i32 3, metadata !28, metadata !40} !45 = metadata !{i32 22, i32 3, metadata !23, null} !46 = metadata !{i32 23, i32 1, metadata !23, null} +!47 = metadata !{metadata !0, metadata !6, metadata !7} +!48 = metadata !{metadata !8, metadata !10, metadata !12} +!49 = metadata !{metadata !14, metadata !15, metadata !16} +!50 = metadata !{metadata !17, metadata !18, metadata !22} +!51 = metadata !{metadata !"a.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll index e40d617619..ae02a245b4 100644 --- a/test/CodeGen/ARM/debug-info-sreg2.ll +++ b/test/CodeGen/ARM/debug-info-sreg2.ll @@ -40,22 +40,23 @@ declare float @_Z2f3f(float) optsize declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!16 = metadata !{metadata !1} -!17 = metadata !{metadata !5, metadata !8} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"k.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130845)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"k.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ] +!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, null, null, metadata !16, null, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", metadata !2, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{null} !5 = metadata !{i32 786688, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ] -!6 = metadata !{i32 786443, metadata !1, i32 5, i32 12, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!6 = metadata !{i32 786443, metadata !2, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ] !7 = metadata !{i32 786468, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] !8 = metadata !{i32 786688, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ] -!9 = metadata !{i32 786443, metadata !10, i32 7, i32 25, metadata !2, i32 2} ; [ DW_TAG_lexical_block ] -!10 = metadata !{i32 786443, metadata !6, i32 7, i32 3, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] +!9 = metadata !{i32 786443, metadata !2, metadata !10, i32 7, i32 25, i32 2} ; [ DW_TAG_lexical_block ] +!10 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 3, i32 1} ; [ DW_TAG_lexical_block ] !11 = metadata !{i32 6, i32 18, metadata !6, null} !12 = metadata !{i32 7, i32 3, metadata !6, null} !13 = metadata !{i32 8, i32 20, metadata !9, null} !14 = metadata !{i32 7, i32 20, metadata !10, null} !15 = metadata !{i32 10, i32 1, metadata !6, null} +!16 = metadata !{metadata !1} +!17 = metadata !{metadata !5, metadata !8} +!18 = metadata !{metadata !"k.cc", metadata !"/private/tmp"} diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 48ef5ed88f..c7e2f5d094 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U +; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { @@ -18,6 +20,8 @@ entry: ; NFP0: vadd.f32 s ; CORTEXA8: test: -; CORTEXA8: vadd.f32 d +; CORTEXA8: vadd.f32 s +; CORTEXA8U: test: +; CORTEXA8U: vadd.f32 d ; CORTEXA9: test: -; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}} +; CORTEXA9: vadd.f32 s diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index 1566a9272d..f5245c9463 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U +; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { @@ -18,9 +20,11 @@ entry: ; NFP0: vmul.f32 s ; CORTEXA8: test: -; CORTEXA8: vmul.f32 d +; CORTEXA8: vmul.f32 s +; CORTEXA8U: test: +; CORTEXA8U: vmul.f32 d ; CORTEXA9: test: -; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}} +; CORTEXA9: vmul.f32 s ; VFP2: test2 define float @test2(float %a) nounwind { diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll index 418b59803d..d84690ba4e 100644 --- a/test/CodeGen/ARM/fnegs.ll +++ b/test/CodeGen/ARM/fnegs.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=CORTEXA8U +; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8U ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test1(float* %a) { @@ -22,7 +24,10 @@ entry: ; NFP0: vneg.f32 s{{.*}}, s{{.*}} ; CORTEXA8: test1: -; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}} +; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}} + +; CORTEXA8U: test1: +; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}} ; CORTEXA9: test1: ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}} @@ -46,7 +51,10 @@ entry: ; NFP0: vneg.f32 s{{.*}}, s{{.*}} ; CORTEXA8: test2: -; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}} +; CORTEXA8: vneg.f32 s{{.*}}, s{{.*}} + +; CORTEXA8U: test2: +; CORTEXA8U: vneg.f32 d{{.*}}, d{{.*}} ; CORTEXA9: test2: ; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}} diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 9ce9b7ae7d..c308061734 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -1,7 +1,9 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=A8U +; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8U define float @t1(float %acc, float %a, float %b) nounwind { entry: @@ -11,9 +13,13 @@ entry: ; NEON: t1: ; NEON: vnmla.f32 +; A8U: t1: +; A8U: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} +; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} + ; A8: t1: ; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} -; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} +; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} %0 = fmul float %a, %b %1 = fsub float -0.0, %0 %2 = fsub float %1, %acc @@ -28,9 +34,13 @@ entry: ; NEON: t2: ; NEON: vnmla.f32 +; A8U: t2: +; A8U: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}} +; A8U: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} + ; A8: t2: ; A8: vnmul.f32 s{{[01234]}}, s{{[01234]}}, s{{[01234]}} -; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} +; A8: vsub.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} %0 = fmul float %a, %b %1 = fmul float -1.0, %0 %2 = fsub float %1, %acc @@ -45,6 +55,10 @@ entry: ; NEON: t3: ; NEON: vnmla.f64 +; A8U: t3: +; A8U: vnmul.f64 d +; A8U: vsub.f64 d + ; A8: t3: ; A8: vnmul.f64 d ; A8: vsub.f64 d @@ -62,6 +76,10 @@ entry: ; NEON: t4: ; NEON: vnmla.f64 +; A8U: t4: +; A8U: vnmul.f64 d +; A8U: vsub.f64 d + ; A8: t4: ; A8: vnmul.f64 d ; A8: vsub.f64 d diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 44298b9c5d..3c47eb580f 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NEON +; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2 define i32 @test1(float %a, float %b) { diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll index f039e74c8e..617b01881a 100644 --- a/test/CodeGen/ARM/fsubs.ll +++ b/test/CodeGen/ARM/fsubs.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=NFP1U +; RUN: llc < %s -mtriple=arm-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1U ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0 define float @test(float %a, float %b) { @@ -9,5 +11,6 @@ entry: } ; VFP2: vsub.f32 s -; NFP1: vsub.f32 d +; NFP1U: vsub.f32 d +; NFP1: vsub.f32 s ; NFP0: vsub.f32 s diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll index 1732df3fa5..f88e927961 100644 --- a/test/CodeGen/ARM/global-merge.ll +++ b/test/CodeGen/ARM/global-merge.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=thumb-apple-darwin -global-merge-on-const=true | FileCheck %s ; Test the ARMGlobalMerge pass. Use -march=thumb because it has a small ; value for the maximum offset (127). @@ -6,6 +6,52 @@ ; CHECK: g0: @g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ] +; Global variables marked with "used" attribute must be kept +; CHECK: g8 +@g8 = internal global i32 0 +@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @g8 to i8*)], section "llvm.metadata" + +; Global used in landing pad instruction must be kept +; CHECK: ZTIi +@_ZTIi = internal global i8* null + +define i32 @_Z9exceptioni(i32 %arg) { +bb: + %tmp = invoke i32 @_Z14throwSomethingi(i32 %arg) + to label %bb9 unwind label %bb1 + +bb1: ; preds = %bb + %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) + catch i8* bitcast (i8** @_ZTIi to i8*) + %tmp3 = extractvalue { i8*, i32 } %tmp2, 1 + %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) + %tmp5 = icmp eq i32 %tmp3, %tmp4 + br i1 %tmp5, label %bb6, label %bb10 + +bb6: ; preds = %bb1 + %tmp7 = extractvalue { i8*, i32 } %tmp2, 0 + %tmp8 = tail call i8* @__cxa_begin_catch(i8* %tmp7) + tail call void @__cxa_end_catch() + br label %bb9 + +bb9: ; preds = %bb6, %bb + %res.0 = phi i32 [ 0, %bb6 ], [ %tmp, %bb ] + ret i32 %res.0 + +bb10: ; preds = %bb1 + resume { i8*, i32 } %tmp2 +} + +declare i32 @_Z14throwSomethingi(i32) + +declare i32 @__gxx_personality_sj0(...) + +declare i32 @llvm.eh.typeid.for(i8*) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + ; CHECK: _MergedGlobals: @g1 = internal global i32 1 @g2 = internal global i32 2 @@ -21,3 +67,8 @@ ; CHECK: _MergedGlobals2 @g4 = internal global i32 0 @g5 = internal global i32 0 + +; Global variables that are constant can be merged together +; CHECK: _MergedGlobals3 +@g6 = internal constant [12 x i32] zeroinitializer, align 4 +@g7 = internal constant [12 x i32] zeroinitializer, align 4 diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg index cb77b09ef4..4d75f581a1 100644 --- a/test/CodeGen/ARM/lit.local.cfg +++ b/test/CodeGen/ARM/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'ARM' in targets: diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll index 5283f5747d..248c4bd1be 100644 --- a/test/CodeGen/ARM/lsr-icmp-imm.ll +++ b/test/CodeGen/ARM/lsr-icmp-imm.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s -; RUN: llc -mtriple=armv7-apple-ios -disable-code-place < %s | FileCheck %s +; RUN: llc -mtriple=thumbv7-apple-ios -disable-block-placement < %s | FileCheck %s +; RUN: llc -mtriple=armv7-apple-ios -disable-block-placement < %s | FileCheck %s ; LSR should compare against the post-incremented induction variable. ; In this case, the immediate value is -2 which requires a cmn instruction. diff --git a/test/CodeGen/ARM/neon-spfp.ll b/test/CodeGen/ARM/neon-spfp.ll new file mode 100644 index 0000000000..c00f0d17c9 --- /dev/null +++ b/test/CodeGen/ARM/neon-spfp.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 | FileCheck %s -check-prefix=LINUXA5 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=LINUXA8 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 | FileCheck %s -check-prefix=LINUXA9 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 | FileCheck %s -check-prefix=LINUXA15 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift | FileCheck %s -check-prefix=LINUXSWIFT + +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a5 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA5 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a8 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA8 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a9 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA9 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=cortex-a15 --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFEA15 +; RUN: llc < %s -mtriple armv7a-none-linux-gnueabihf -mcpu=swift --enable-unsafe-fp-math | FileCheck %s -check-prefix=UNSAFESWIFT + +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a5 | FileCheck %s -check-prefix=DARWINA5 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=DARWINA8 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=DARWINA9 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=cortex-a15 | FileCheck %s -check-prefix=DARWINA15 +; RUN: llc < %s -mtriple armv7a-none-darwin -mcpu=swift | FileCheck %s -check-prefix=DARWINSWIFT + +; This test makes sure we're not lowering VMUL.f32 D* (aka. NEON) for single-prec. FP ops, since +; NEON is not fully IEEE 754 compliant, unless unsafe-math is selected. + +@.str = private unnamed_addr constant [12 x i8] c"S317\09%.5g \0A\00", align 1 + +; CHECK-LINUXA5: main: +; CHECK-LINUXA8: main: +; CHECK-LINUXA9: main: +; CHECK-LINUXA15: main: +; CHECK-LINUXSWIFT: main: +; CHECK-UNSAFEA5: main: +; CHECK-UNSAFEA8: main: +; CHECK-UNSAFEA9: main: +; CHECK-UNSAFEA15: main: +; CHECK-UNSAFESWIFT: main: +; CHECK-DARWINA5: main: +; CHECK-DARWINA8: main: +; CHECK-DARWINA9: main: +; CHECK-DARWINA15: main: +; CHECK-DARWINSWIFT: main: +define i32 @main() { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.04 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %q.03 = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ] + %mul = fmul float %q.03, 0x3FEFAE1480000000 +; CHECK-LINUXA5: vmul.f32 s{{[0-9]*}} +; CHECK-LINUXA8: vmul.f32 s{{[0-9]*}} +; CHECK-LINUXA9: vmul.f32 s{{[0-9]*}} +; CHECK-LINUXA15: vmul.f32 s{{[0-9]*}} +; Swift is *always* unsafe +; CHECK-LINUXSWIFT: vmul.f32 d{{[0-9]*}} + +; CHECK-UNSAFEA5: vmul.f32 d{{[0-9]*}} +; CHECK-UNSAFEA8: vmul.f32 d{{[0-9]*}} +; A9 and A15 don't need this +; CHECK-UNSAFEA9: vmul.f32 s{{[0-9]*}} +; CHECK-UNSAFEA15: vmul.f32 s{{[0-9]*}} +; CHECK-UNSAFESWIFT: vmul.f32 d{{[0-9]*}} + +; CHECK-DARWINA5: vmul.f32 d{{[0-9]*}} +; CHECK-DARWINA8: vmul.f32 d{{[0-9]*}} +; CHECK-DARWINA9: vmul.f32 s{{[0-9]*}} +; CHECK-DARWINA15: vmul.f32 s{{[0-9]*}} +; CHECK-DARWINSWIFT: vmul.f32 d{{[0-9]*}} + %conv = fpext float %mul to double + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %conv) #1 + %inc = add nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, 16000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll index d301c6a4ca..0a7c8b2b6a 100644 --- a/test/CodeGen/ARM/neon_minmax.ll +++ b/test/CodeGen/ARM/neon_minmax.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=swift | FileCheck %s define float @fmin_ole(float %x) nounwind { ;CHECK: fmin_ole: diff --git a/test/CodeGen/ARM/vcvt.ll b/test/CodeGen/ARM/vcvt.ll index c078f49309..e67b4788a3 100644 --- a/test/CodeGen/ARM/vcvt.ll +++ b/test/CodeGen/ARM/vcvt.ll @@ -156,3 +156,175 @@ define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind { declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone + +; We currently estimate the cost of sext/zext/trunc v8(v16)i32 <-> v8(v16)i8 +; instructions as expensive. If lowering is improved the cost model needs to +; change. +; RUN: opt < %s -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -march=arm -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST +%T0_5 = type <8 x i8> +%T1_5 = type <8 x i32> +; CHECK: func_cvt5: +define void @func_cvt5(%T0_5* %loadaddr, %T1_5* %storeaddr) { +; CHECK: vmovl.s8 +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 + %v0 = load %T0_5* %loadaddr +; COST: func_cvt5 +; COST: cost of 3 {{.*}} sext + %r = sext %T0_5 %v0 to %T1_5 + store %T1_5 %r, %T1_5* %storeaddr + ret void +} +;; We currently estimate the cost of this instruction as expensive. If lowering +;; is improved the cost needs to change. +%TA0_5 = type <8 x i8> +%TA1_5 = type <8 x i32> +; CHECK: func_cvt1: +define void @func_cvt1(%TA0_5* %loadaddr, %TA1_5* %storeaddr) { +; CHECK: vmovl.u8 +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 + %v0 = load %TA0_5* %loadaddr +; COST: func_cvt1 +; COST: cost of 3 {{.*}} zext + %r = zext %TA0_5 %v0 to %TA1_5 + store %TA1_5 %r, %TA1_5* %storeaddr + ret void +} +;; We currently estimate the cost of this instruction as expensive. If lowering +;; is improved the cost needs to change. +%T0_51 = type <8 x i32> +%T1_51 = type <8 x i8> +; CHECK: func_cvt51: +define void @func_cvt51(%T0_51* %loadaddr, %T1_51* %storeaddr) { +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb + %v0 = load %T0_51* %loadaddr +; COST: func_cvt51 +; COST: cost of 19 {{.*}} trunc + %r = trunc %T0_51 %v0 to %T1_51 + store %T1_51 %r, %T1_51* %storeaddr + ret void +} +;; We currently estimate the cost of this instruction as expensive. If lowering +;; is improved the cost needs to change. +%TT0_5 = type <16 x i8> +%TT1_5 = type <16 x i32> +; CHECK: func_cvt52: +define void @func_cvt52(%TT0_5* %loadaddr, %TT1_5* %storeaddr) { +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 +; CHECK: vmovl.s16 + %v0 = load %TT0_5* %loadaddr +; COST: func_cvt52 +; COST: cost of 6 {{.*}} sext + %r = sext %TT0_5 %v0 to %TT1_5 + store %TT1_5 %r, %TT1_5* %storeaddr + ret void +} +;; We currently estimate the cost of this instruction as expensive. If lowering +;; is improved the cost needs to change. +%TTA0_5 = type <16 x i8> +%TTA1_5 = type <16 x i32> +; CHECK: func_cvt12: +define void @func_cvt12(%TTA0_5* %loadaddr, %TTA1_5* %storeaddr) { +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 +; CHECK: vmovl.u16 + %v0 = load %TTA0_5* %loadaddr +; COST: func_cvt12 +; COST: cost of 6 {{.*}} zext + %r = zext %TTA0_5 %v0 to %TTA1_5 + store %TTA1_5 %r, %TTA1_5* %storeaddr + ret void +} +;; We currently estimate the cost of this instruction as expensive. If lowering +;; is improved the cost needs to change. +%TT0_51 = type <16 x i32> +%TT1_51 = type <16 x i8> +; CHECK: func_cvt512: +define void @func_cvt512(%TT0_51* %loadaddr, %TT1_51* %storeaddr) { +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb + %v0 = load %TT0_51* %loadaddr +; COST: func_cvt512 +; COST: cost of 38 {{.*}} trunc + %r = trunc %TT0_51 %v0 to %TT1_51 + store %TT1_51 %r, %TT1_51* %storeaddr + ret void +} + +; CHECK: sext_v4i16_v4i64: +define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) { +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 + %v0 = load <4 x i16>* %loadaddr +; COST: sext_v4i16_v4i64 +; COST: cost of 3 {{.*}} sext + %r = sext <4 x i16> %v0 to <4 x i64> + store <4 x i64> %r, <4 x i64>* %storeaddr + ret void +} + +; CHECK: zext_v4i16_v4i64: +define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) { +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 + %v0 = load <4 x i16>* %loadaddr +; COST: zext_v4i16_v4i64 +; COST: cost of 3 {{.*}} zext + %r = zext <4 x i16> %v0 to <4 x i64> + store <4 x i64> %r, <4 x i64>* %storeaddr + ret void +} + +; CHECK: sext_v8i16_v8i64: +define void @sext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) { +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 +; CHECK: vmovl.s32 + %v0 = load <8 x i16>* %loadaddr +; COST: sext_v8i16_v8i64 +; COST: cost of 6 {{.*}} sext + %r = sext <8 x i16> %v0 to <8 x i64> + store <8 x i64> %r, <8 x i64>* %storeaddr + ret void +} + +; CHECK: zext_v8i16_v8i64: +define void @zext_v8i16_v8i64(<8 x i16>* %loadaddr, <8 x i64>* %storeaddr) { +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 +; CHECK: vmovl.u32 + %v0 = load <8 x i16>* %loadaddr +; COST: zext_v8i16_v8i64 +; COST: cost of 6 {{.*}} zext + %r = zext <8 x i16> %v0 to <8 x i64> + store <8 x i64> %r, <8 x i64>* %storeaddr + ret void +} + diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll index f5994046de..7e79d6c68c 100644 --- a/test/CodeGen/ARM/vselect_imax.ll +++ b/test/CodeGen/ARM/vselect_imax.ll @@ -10,3 +10,114 @@ define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) { ret void } +; We adjusted the cost model of the following selects. When we improve code +; lowering we also need to adjust the cost. +%T0_10 = type <16 x i16> +%T1_10 = type <16 x i1> +; CHECK: func_blend10: +define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2, + %T1_10* %blend, %T0_10* %storeaddr) { + %v0 = load %T0_10* %loadaddr + %v1 = load %T0_10* %loadaddr2 + %c = icmp slt %T0_10 %v0, %v1 +; CHECK: vst1 +; CHECK: vst1 +; CHECK: vst1 +; CHECK: vst1 +; CHECK: vld +; COST: func_blend10 +; COST: cost of 40 {{.*}} select + %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1 + store %T0_10 %r, %T0_10* %storeaddr + ret void +} +%T0_14 = type <8 x i32> +%T1_14 = type <8 x i1> +; CHECK: func_blend14: +define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, + %T1_14* %blend, %T0_14* %storeaddr) { + %v0 = load %T0_14* %loadaddr + %v1 = load %T0_14* %loadaddr2 + %c = icmp slt %T0_14 %v0, %v1 +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; COST: func_blend14 +; COST: cost of 41 {{.*}} select + %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1 + store %T0_14 %r, %T0_14* %storeaddr + ret void +} +%T0_15 = type <16 x i32> +%T1_15 = type <16 x i1> +; CHECK: func_blend15: +define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, + %T1_15* %blend, %T0_15* %storeaddr) { + %v0 = load %T0_15* %loadaddr + %v1 = load %T0_15* %loadaddr2 + %c = icmp slt %T0_15 %v0, %v1 +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; COST: func_blend15 +; COST: cost of 82 {{.*}} select + %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1 + store %T0_15 %r, %T0_15* %storeaddr + ret void +} +%T0_18 = type <4 x i64> +%T1_18 = type <4 x i1> +; CHECK: func_blend18: +define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, + %T1_18* %blend, %T0_18* %storeaddr) { + %v0 = load %T0_18* %loadaddr + %v1 = load %T0_18* %loadaddr2 + %c = icmp slt %T0_18 %v0, %v1 +; CHECK: strh +; CHECK: strh +; CHECK: strh +; CHECK: strh +; COST: func_blend18 +; COST: cost of 19 {{.*}} select + %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1 + store %T0_18 %r, %T0_18* %storeaddr + ret void +} +%T0_19 = type <8 x i64> +%T1_19 = type <8 x i1> +; CHECK: func_blend19: +define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, + %T1_19* %blend, %T0_19* %storeaddr) { + %v0 = load %T0_19* %loadaddr + %v1 = load %T0_19* %loadaddr2 + %c = icmp slt %T0_19 %v0, %v1 +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; COST: func_blend19 +; COST: cost of 50 {{.*}} select + %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1 + store %T0_19 %r, %T0_19* %storeaddr + ret void +} +%T0_20 = type <16 x i64> +%T1_20 = type <16 x i1> +; CHECK: func_blend20: +define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2, + %T1_20* %blend, %T0_20* %storeaddr) { + %v0 = load %T0_20* %loadaddr + %v1 = load %T0_20* %loadaddr2 + %c = icmp slt %T0_20 %v0, %v1 +; CHECK: strb +; CHECK: strb +; CHECK: strb +; CHECK: strb +; COST: func_blend20 +; COST: cost of 100 {{.*}} select + %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1 + store %T0_20 %r, %T0_20* %storeaddr + ret void +} diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll index 679e3f4347..1efbc73650 100644 --- a/test/CodeGen/ARM/widen-vmovs.ll +++ b/test/CodeGen/ARM/widen-vmovs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-code-place | FileCheck %s +; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s target triple = "thumbv7-apple-ios" ; The 1.0e+10 constant is loaded from the constant pool and kept in a register. diff --git a/test/CodeGen/ARM/zextload_demandedbits.ll b/test/CodeGen/ARM/zextload_demandedbits.ll new file mode 100644 index 0000000000..3d3269cae2 --- /dev/null +++ b/test/CodeGen/ARM/zextload_demandedbits.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -march=arm -mtriple="thumbv7-apple-ios3.0.0" | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" + +%struct.eggs = type { %struct.spam, i16 } +%struct.spam = type { [3 x i32] } +%struct.barney = type { [2 x i32], [2 x i32] } + +; Make sure that the sext op does not get lost due to ComputeMaskedBits. +; CHECK: quux +; CHECK: lsl +; CHECK: asr +; CHECK: bl +; CHECK: pop +define void @quux(%struct.eggs* %arg) { +bb: + %tmp1 = getelementptr inbounds %struct.eggs* %arg, i32 0, i32 1 + %0 = load i16* %tmp1, align 2 + %tobool = icmp eq i16 %0, 0 + br i1 %tobool, label %bb16, label %bb3 + +bb3: ; preds = %bb + %tmp4 = bitcast i16* %tmp1 to i8* + %tmp5 = ptrtoint i16* %tmp1 to i32 + %tmp6 = shl i32 %tmp5, 20 + %tmp7 = ashr exact i32 %tmp6, 20 + %tmp14 = getelementptr inbounds %struct.barney* undef, i32 %tmp7 + %tmp15 = tail call i32 @widget(%struct.barney* %tmp14, i8* %tmp4, i32 %tmp7) + br label %bb16 + +bb16: ; preds = %bb3, %bb + ret void +} + +declare i32 @widget(%struct.barney*, i8*, i32) diff --git a/test/CodeGen/CPP/2007-06-16-Funcname.ll b/test/CodeGen/CPP/2007-06-16-Funcname.ll index 16e9798481..71fea12d9c 100644 --- a/test/CodeGen/CPP/2007-06-16-Funcname.ll +++ b/test/CodeGen/CPP/2007-06-16-Funcname.ll @@ -5,4 +5,3 @@ define void @foo() { ret void } - diff --git a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll index da1aeb556a..7ffb734c71 100644 --- a/test/CodeGen/Generic/2008-02-20-MatchingMem.ll +++ b/test/CodeGen/Generic/2008-02-20-MatchingMem.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s ; PR1133 +; XFAIL: hexagon define void @test(i32* %X) nounwind { entry: %tmp1 = getelementptr i32* %X, i32 10 ; <i32*> [#uses=2] diff --git a/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll new file mode 100644 index 0000000000..a1aed0e3a4 --- /dev/null +++ b/test/CodeGen/Generic/2013-03-20-APFloatCrash.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s + +define internal i1 @f(float %s) { +entry: + %c = fcmp ogt float %s, 0x41EFFFFFE0000000 + ret i1 %c +} diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll index e709080bfc..a135c625fc 100644 --- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll +++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s - +; XFAIL: hexagon declare { i64, double } @wild() define void @foo(i64* %p, double* %q) nounwind { diff --git a/test/CodeGen/Generic/select-cc.ll b/test/CodeGen/Generic/select-cc.ll index b653e2a46d..7510f701b1 100644 --- a/test/CodeGen/Generic/select-cc.ll +++ b/test/CodeGen/Generic/select-cc.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s ; PR2504 - +; XFAIL: hexagon define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind { %x.lo = extractelement <2 x double> %x, i32 0 ; <double> [#uses=1] %x.lo.ge = fcmp oge double %x.lo, 0.000000e+00 ; <i1> [#uses=1] diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll index 84814a1c12..bc7c7d00a1 100644 --- a/test/CodeGen/Generic/vector.ll +++ b/test/CodeGen/Generic/vector.ll @@ -1,6 +1,6 @@ ; Test that vectors are scalarized/lowered correctly. ; RUN: llc < %s - +; XFAIL: hexagon %d8 = type <8 x double> %f1 = type <1 x float> diff --git a/test/CodeGen/Hexagon/ashift-left-right.ll b/test/CodeGen/Hexagon/ashift-left-right.ll new file mode 100644 index 0000000000..7c41bc7bbf --- /dev/null +++ b/test/CodeGen/Hexagon/ashift-left-right.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s + +define i32 @foo(i32 %a, i32 %b) nounwind readnone { +; CHECK: lsl +; CHECK: aslh +entry: + %shl1 = shl i32 16, %a + %shl2 = shl i32 %b, 16 + %ret = mul i32 %shl1, %shl2 + ret i32 %ret +} + +define i32 @bar(i32 %a, i32 %b) nounwind readnone { +; CHECK: asrh +; CHECK: lsr +entry: + %shl1 = ashr i32 16, %a + %shl2 = ashr i32 %b, 16 + %ret = mul i32 %shl1, %shl2 + ret i32 %ret +} diff --git a/test/CodeGen/Hexagon/gp-rel.ll b/test/CodeGen/Hexagon/gp-rel.ll new file mode 100644 index 0000000000..561869e8ef --- /dev/null +++ b/test/CodeGen/Hexagon/gp-rel.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s +; Check that gp-relative instructions are being generated. + +@a = common global i32 0, align 4 +@b = common global i32 0, align 4 +@c = common global i32 0, align 4 + +define i32 @foo(i32 %p) #0 { +entry: +; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#a) +; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#b) +; CHECK: if{{ *}}(p{{[0-3]}}) memw(##c){{ *}}={{ *}}r{{[0-9]+}} + %0 = load i32* @a, align 4 + %1 = load i32* @b, align 4 + %add = add nsw i32 %1, %0 + %cmp = icmp eq i32 %0, %1 + br i1 %cmp, label %if.then, label %entry.if.end_crit_edge + +entry.if.end_crit_edge: + %.pre = load i32* @c, align 4 + br label %if.end + +if.then: + %add1 = add nsw i32 %add, %0 + store i32 %add1, i32* @c, align 4 + br label %if.end + +if.end: + %2 = phi i32 [ %.pre, %entry.if.end_crit_edge ], [ %add1, %if.then ] + %cmp2 = icmp eq i32 %add, %2 + %sel1 = select i1 %cmp2, i32 %2, i32 %1 + ret i32 %sel1 +} diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll index eaffa0797a..c2e8153b7d 100644 --- a/test/CodeGen/Hexagon/hwloop-dbg.ll +++ b/test/CodeGen/Hexagon/hwloop-dbg.ll @@ -33,7 +33,6 @@ for.end: ; preds = %for.body declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone -!llvm.dbg.cu = !{!0} !0 = metadata !{i32 786449, i32 0, i32 12, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99] !1 = metadata !{metadata !2} diff --git a/test/CodeGen/Hexagon/memops.ll b/test/CodeGen/Hexagon/memops.ll new file mode 100644 index 0000000000..5498848d85 --- /dev/null +++ b/test/CodeGen/Hexagon/memops.ll @@ -0,0 +1,1369 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; Generate MemOps for V4 and above. + +define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %0 = load i8* %p, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 5 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_add(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv = zext i8 %x to i32 + %0 = load i8* %p, align 1, !tbaa !0 + %conv1 = zext i8 %0 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i8 + store i8 %conv2, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_sub(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv = zext i8 %x to i32 + %0 = load i8* %p, align 1, !tbaa !0 + %conv1 = zext i8 %0 to i32 + %sub = sub nsw i32 %conv1, %conv + %conv2 = trunc i32 %sub to i8 + store i8 %conv2, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %0 = load i8* %p, align 1, !tbaa !0 + %or3 = or i8 %0, %x + store i8 %or3, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %0 = load i8* %p, align 1, !tbaa !0 + %and3 = and i8 %0, %x + store i8 %and3, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %0 = load i8* %p, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %and = and i32 %conv, 223 + %conv1 = trunc i32 %and to i8 + store i8 %conv1, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %0 = load i8* %p, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %or = or i32 %conv, 128 + %conv1 = trunc i32 %or to i8 + store i8 %conv1, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_add5_index(i8* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 5 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_add_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv1 = zext i8 %0 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_sub_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv1 = zext i8 %0 to i32 + %sub = sub nsw i32 %conv1, %conv + %conv2 = trunc i32 %sub to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_or_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %or3 = or i8 %0, %x + store i8 %or3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_and_index(i8* nocapture %p, i32 %i, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %and3 = and i8 %0, %x + store i8 %and3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %and = and i32 %conv, 223 + %conv1 = trunc i32 %and to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_setbit_index(i8* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %or = or i32 %conv, 128 + %conv1 = trunc i32 %or to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_add5_index5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %add = add nsw i32 %conv, 5 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_add_index5(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}} + %conv = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv1 = zext i8 %0 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_sub_index5(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}} + %conv = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv1 = zext i8 %0 to i32 + %sub = sub nsw i32 %conv1, %conv + %conv2 = trunc i32 %sub to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_or_index5(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %or3 = or i8 %0, %x + store i8 %or3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_and_index5(i8* nocapture %p, i8 zeroext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %and3 = and i8 %0, %x + store i8 %and3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_clrbit_index5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %and = and i32 %conv, 223 + %conv1 = trunc i32 %and to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_char_setbit_index5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %or = or i32 %conv, 128 + %conv1 = trunc i32 %or to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_add5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %0 = load i8* %p, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %add = add nsw i32 %conv2, 5 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_add(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv4 = zext i8 %x to i32 + %0 = load i8* %p, align 1, !tbaa !0 + %conv13 = zext i8 %0 to i32 + %add = add nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %add to i8 + store i8 %conv2, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_sub(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv4 = zext i8 %x to i32 + %0 = load i8* %p, align 1, !tbaa !0 + %conv13 = zext i8 %0 to i32 + %sub = sub nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %sub to i8 + store i8 %conv2, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %0 = load i8* %p, align 1, !tbaa !0 + %or3 = or i8 %0, %x + store i8 %or3, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %0 = load i8* %p, align 1, !tbaa !0 + %and3 = and i8 %0, %x + store i8 %and3, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %0 = load i8* %p, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %and = and i32 %conv2, 223 + %conv1 = trunc i32 %and to i8 + store i8 %conv1, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_setbit(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %0 = load i8* %p, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %or = or i32 %conv2, 128 + %conv1 = trunc i32 %or to i8 + store i8 %conv1, i8* %p, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_add5_index(i8* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %add = add nsw i32 %conv2, 5 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_add_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv4 = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv13 = zext i8 %0 to i32 + %add = add nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %add to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_sub_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv4 = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv13 = zext i8 %0 to i32 + %sub = sub nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %sub to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_or_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %or3 = or i8 %0, %x + store i8 %or3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_and_index(i8* nocapture %p, i32 %i, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %and3 = and i8 %0, %x + store i8 %and3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_clrbit_index(i8* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %and = and i32 %conv2, 223 + %conv1 = trunc i32 %and to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_setbit_index(i8* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 %i + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %or = or i32 %conv2, 128 + %conv1 = trunc i32 %or to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_add5_index5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %add = add nsw i32 %conv2, 5 + %conv1 = trunc i32 %add to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_add_index5(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}} + %conv4 = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv13 = zext i8 %0 to i32 + %add = add nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %add to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_sub_index5(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}} + %conv4 = zext i8 %x to i32 + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv13 = zext i8 %0 to i32 + %sub = sub nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %sub to i8 + store i8 %conv2, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_or_index5(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %or3 = or i8 %0, %x + store i8 %or3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_and_index5(i8* nocapture %p, i8 signext %x) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %and3 = and i8 %0, %x + store i8 %and3, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_clrbit_index5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %and = and i32 %conv2, 223 + %conv1 = trunc i32 %and to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_signed_char_setbit_index5(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i8* %p, i32 5 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv2 = zext i8 %0 to i32 + %or = or i32 %conv2, 128 + %conv1 = trunc i32 %or to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %0 = load i16* %p, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %add = add nsw i32 %conv, 5 + %conv1 = trunc i32 %add to i16 + store i16 %conv1, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_add(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv = zext i16 %x to i32 + %0 = load i16* %p, align 2, !tbaa !2 + %conv1 = zext i16 %0 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i16 + store i16 %conv2, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_sub(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv = zext i16 %x to i32 + %0 = load i16* %p, align 2, !tbaa !2 + %conv1 = zext i16 %0 to i32 + %sub = sub nsw i32 %conv1, %conv + %conv2 = trunc i32 %sub to i16 + store i16 %conv2, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %0 = load i16* %p, align 2, !tbaa !2 + %or3 = or i16 %0, %x + store i16 %or3, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %0 = load i16* %p, align 2, !tbaa !2 + %and3 = and i16 %0, %x + store i16 %and3, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %0 = load i16* %p, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %and = and i32 %conv, 65503 + %conv1 = trunc i32 %and to i16 + store i16 %conv1, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %0 = load i16* %p, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %or = or i32 %conv, 128 + %conv1 = trunc i32 %or to i16 + store i16 %conv1, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_add5_index(i16* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %add = add nsw i32 %conv, 5 + %conv1 = trunc i32 %add to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_add_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv1 = zext i16 %0 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_sub_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv1 = zext i16 %0 to i32 + %sub = sub nsw i32 %conv1, %conv + %conv2 = trunc i32 %sub to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_or_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %or3 = or i16 %0, %x + store i16 %or3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_and_index(i16* nocapture %p, i32 %i, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %and3 = and i16 %0, %x + store i16 %and3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %and = and i32 %conv, 65503 + %conv1 = trunc i32 %and to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_setbit_index(i16* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %or = or i32 %conv, 128 + %conv1 = trunc i32 %or to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_add5_index5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %add = add nsw i32 %conv, 5 + %conv1 = trunc i32 %add to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_add_index5(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}} + %conv = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv1 = zext i16 %0 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_sub_index5(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}} + %conv = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv1 = zext i16 %0 to i32 + %sub = sub nsw i32 %conv1, %conv + %conv2 = trunc i32 %sub to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_or_index5(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %or3 = or i16 %0, %x + store i16 %or3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_and_index5(i16* nocapture %p, i16 zeroext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %and3 = and i16 %0, %x + store i16 %and3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_clrbit_index5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %and = and i32 %conv, 65503 + %conv1 = trunc i32 %and to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_unsigned_short_setbit_index5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv = zext i16 %0 to i32 + %or = or i32 %conv, 128 + %conv1 = trunc i32 %or to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_add5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %0 = load i16* %p, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %add = add nsw i32 %conv2, 5 + %conv1 = trunc i32 %add to i16 + store i16 %conv1, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_add(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv4 = zext i16 %x to i32 + %0 = load i16* %p, align 2, !tbaa !2 + %conv13 = zext i16 %0 to i32 + %add = add nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %add to i16 + store i16 %conv2, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_sub(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv4 = zext i16 %x to i32 + %0 = load i16* %p, align 2, !tbaa !2 + %conv13 = zext i16 %0 to i32 + %sub = sub nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %sub to i16 + store i16 %conv2, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %0 = load i16* %p, align 2, !tbaa !2 + %or3 = or i16 %0, %x + store i16 %or3, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %0 = load i16* %p, align 2, !tbaa !2 + %and3 = and i16 %0, %x + store i16 %and3, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %0 = load i16* %p, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %conv2, 65503 + %conv1 = trunc i32 %and to i16 + store i16 %conv1, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_setbit(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %0 = load i16* %p, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %or = or i32 %conv2, 128 + %conv1 = trunc i32 %or to i16 + store i16 %conv1, i16* %p, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_add5_index(i16* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %add = add nsw i32 %conv2, 5 + %conv1 = trunc i32 %add to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_add_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %conv4 = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv13 = zext i16 %0 to i32 + %add = add nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %add to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_sub_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %conv4 = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv13 = zext i16 %0 to i32 + %sub = sub nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %sub to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_or_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %or3 = or i16 %0, %x + store i16 %or3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_and_index(i16* nocapture %p, i32 %i, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %and3 = and i16 %0, %x + store i16 %and3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_clrbit_index(i16* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %conv2, 65503 + %conv1 = trunc i32 %and to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_setbit_index(i16* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 %i + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %or = or i32 %conv2, 128 + %conv1 = trunc i32 %or to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_add5_index5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %add = add nsw i32 %conv2, 5 + %conv1 = trunc i32 %add to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_add_index5(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}} + %conv4 = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv13 = zext i16 %0 to i32 + %add = add nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %add to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_sub_index5(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}} + %conv4 = zext i16 %x to i32 + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv13 = zext i16 %0 to i32 + %sub = sub nsw i32 %conv13, %conv4 + %conv2 = trunc i32 %sub to i16 + store i16 %conv2, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_or_index5(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %or3 = or i16 %0, %x + store i16 %or3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_and_index5(i16* nocapture %p, i16 signext %x) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %and3 = and i16 %0, %x + store i16 %and3, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_clrbit_index5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %and = and i32 %conv2, 65503 + %conv1 = trunc i32 %and to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_short_setbit_index5(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i16* %p, i32 5 + %0 = load i16* %add.ptr, align 2, !tbaa !2 + %conv2 = zext i16 %0 to i32 + %or = or i32 %conv2, 128 + %conv1 = trunc i32 %or to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !2 + ret void +} + +define void @memop_signed_int_add5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %0 = load i32* %p, align 4, !tbaa !3 + %add = add i32 %0, 5 + store i32 %add, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %add = add i32 %0, %x + store i32 %add, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %sub = sub i32 %0, %x + store i32 %sub, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %or = or i32 %0, %x + store i32 %or, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %and = and i32 %0, %x + store i32 %and, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %0 = load i32* %p, align 4, !tbaa !3 + %and = and i32 %0, -33 + store i32 %and, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_setbit(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %0 = load i32* %p, align 4, !tbaa !3 + %or = or i32 %0, 128 + store i32 %or, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_add5_index(i32* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add i32 %0, 5 + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add i32 %0, %x + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %sub = sub i32 %0, %x + store i32 %sub, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, %x + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, %x + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, -33 + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_setbit_index(i32* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, 128 + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_add5_index5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add i32 %0, 5 + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_add_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add i32 %0, %x + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_sub_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %sub = sub i32 %0, %x + store i32 %sub, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_or_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, %x + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_and_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, %x + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_clrbit_index5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, -33 + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_signed_int_setbit_index5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, 128 + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %0 = load i32* %p, align 4, !tbaa !3 + %add = add nsw i32 %0, 5 + store i32 %add, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %add = add nsw i32 %0, %x + store i32 %add, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %sub = sub nsw i32 %0, %x + store i32 %sub, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %or = or i32 %0, %x + store i32 %or, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %0 = load i32* %p, align 4, !tbaa !3 + %and = and i32 %0, %x + store i32 %and, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %0 = load i32* %p, align 4, !tbaa !3 + %and = and i32 %0, -33 + store i32 %and, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %0 = load i32* %p, align 4, !tbaa !3 + %or = or i32 %0, 128 + store i32 %or, i32* %p, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_add5_index(i32* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add nsw i32 %0, 5 + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_add_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add nsw i32 %0, %x + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_sub_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %sub = sub nsw i32 %0, %x + store i32 %sub, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_or_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, %x + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_and_index(i32* nocapture %p, i32 %i, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, %x + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_clrbit_index(i32* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, -33 + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_setbit_index(i32* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 %i + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, 128 + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_add5_index5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5 + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add nsw i32 %0, 5 + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_add_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %add = add nsw i32 %0, %x + store i32 %add, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_sub_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %sub = sub nsw i32 %0, %x + store i32 %sub, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_or_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, %x + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_and_index5(i32* nocapture %p, i32 %x) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}} + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, %x + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_clrbit_index5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %and = and i32 %0, -33 + store i32 %and, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +define void @memop_unsigned_int_setbit_index5(i32* nocapture %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}}) + %add.ptr = getelementptr inbounds i32* %p, i32 5 + %0 = load i32* %add.ptr, align 4, !tbaa !3 + %or = or i32 %0, 128 + store i32 %or, i32* %add.ptr, align 4, !tbaa !3 + ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} +!2 = metadata !{metadata !"short", metadata !0} +!3 = metadata !{metadata !"int", metadata !0} diff --git a/test/CodeGen/Hexagon/memops1.ll b/test/CodeGen/Hexagon/memops1.ll new file mode 100644 index 0000000000..2babdc848d --- /dev/null +++ b/test/CodeGen/Hexagon/memops1.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; Generate MemOps for V4 and above. + + +define void @f(i32* %p) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1 + %p.addr = alloca i32*, align 4 + store i32* %p, i32** %p.addr, align 4 + %0 = load i32** %p.addr, align 4 + %add.ptr = getelementptr inbounds i32* %0, i32 10 + %1 = load i32* %add.ptr, align 4 + %sub = sub nsw i32 %1, 1 + store i32 %sub, i32* %add.ptr, align 4 + ret void +} + +define void @g(i32* %p, i32 %i) nounwind { +entry: +; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1 + %p.addr = alloca i32*, align 4 + %i.addr = alloca i32, align 4 + store i32* %p, i32** %p.addr, align 4 + store i32 %i, i32* %i.addr, align 4 + %0 = load i32** %p.addr, align 4 + %1 = load i32* %i.addr, align 4 + %add.ptr = getelementptr inbounds i32* %0, i32 %1 + %add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 10 + %2 = load i32* %add.ptr1, align 4 + %sub = sub nsw i32 %2, 1 + store i32 %sub, i32* %add.ptr1, align 4 + ret void +} diff --git a/test/CodeGen/Hexagon/memops2.ll b/test/CodeGen/Hexagon/memops2.ll new file mode 100644 index 0000000000..b1b25445c0 --- /dev/null +++ b/test/CodeGen/Hexagon/memops2.ll @@ -0,0 +1,32 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; Generate MemOps for V4 and above. + + +define void @f(i16* nocapture %p) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1 + %add.ptr = getelementptr inbounds i16* %p, i32 10 + %0 = load i16* %add.ptr, align 2, !tbaa !0 + %conv2 = zext i16 %0 to i32 + %sub = add nsw i32 %conv2, 65535 + %conv1 = trunc i32 %sub to i16 + store i16 %conv1, i16* %add.ptr, align 2, !tbaa !0 + ret void +} + +define void @g(i16* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1 + %add.ptr.sum = add i32 %i, 10 + %add.ptr1 = getelementptr inbounds i16* %p, i32 %add.ptr.sum + %0 = load i16* %add.ptr1, align 2, !tbaa !0 + %conv3 = zext i16 %0 to i32 + %sub = add nsw i32 %conv3, 65535 + %conv2 = trunc i32 %sub to i16 + store i16 %conv2, i16* %add.ptr1, align 2, !tbaa !0 + ret void +} + +!0 = metadata !{metadata !"short", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/memops3.ll b/test/CodeGen/Hexagon/memops3.ll new file mode 100644 index 0000000000..5b8bd6c87b --- /dev/null +++ b/test/CodeGen/Hexagon/memops3.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; Generate MemOps for V4 and above. + + +define void @f(i8* nocapture %p) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1 + %add.ptr = getelementptr inbounds i8* %p, i32 10 + %0 = load i8* %add.ptr, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %sub = add nsw i32 %conv, 255 + %conv1 = trunc i32 %sub to i8 + store i8 %conv1, i8* %add.ptr, align 1, !tbaa !0 + ret void +} + +define void @g(i8* nocapture %p, i32 %i) nounwind { +entry: +; CHECK: memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1 + %add.ptr.sum = add i32 %i, 10 + %add.ptr1 = getelementptr inbounds i8* %p, i32 %add.ptr.sum + %0 = load i8* %add.ptr1, align 1, !tbaa !0 + %conv = zext i8 %0 to i32 + %sub = add nsw i32 %conv, 255 + %conv2 = trunc i32 %sub to i8 + store i8 %conv2, i8* %add.ptr1, align 1, !tbaa !0 + ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/Hexagon/misaligned-access.ll b/test/CodeGen/Hexagon/misaligned-access.ll new file mode 100644 index 0000000000..4dafb44cc3 --- /dev/null +++ b/test/CodeGen/Hexagon/misaligned-access.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s +; Check that the mis-aligned load doesn't cause compiler to assert. + +declare i32 @_hi(i64) #1 +@temp1 = common global i32 0, align 4 + +define i32 @CSDRSEARCH_executeSearchManager() #0 { +entry: + %temp = alloca i32, align 4 + %0 = load i32* @temp1, align 4 + store i32 %0, i32* %temp, align 4 + %1 = bitcast i32* %temp to i64* + %2 = load i64* %1, align 8 + %call = call i32 @_hi(i64 %2) + ret i32 %call +} diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll index 3f244f6891..d5162b964a 100644 --- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll +++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll @@ -12,11 +12,10 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!11 = metadata !{metadata !0} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -25,3 +24,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !8 = metadata !{i32 786443, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] !9 = metadata !{i32 3, i32 11, metadata !8, null} !10 = metadata !{i32 4, i32 2, metadata !8, null} +!11 = metadata !{metadata !0} +!12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"} diff --git a/test/CodeGen/MBlaze/DbgValueOtherTargets.test b/test/CodeGen/MBlaze/DbgValueOtherTargets.test index 4032d7b81c..8b850f5110 100644 --- a/test/CodeGen/MBlaze/DbgValueOtherTargets.test +++ b/test/CodeGen/MBlaze/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=mblaze -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=mblaze -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg index e236200d75..ff4928de4b 100644 --- a/test/CodeGen/MBlaze/lit.local.cfg +++ b/test/CodeGen/MBlaze/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'MBlaze' in targets: diff --git a/test/CodeGen/MSP430/DbgValueOtherTargets.test b/test/CodeGen/MSP430/DbgValueOtherTargets.test index f8c747ec92..7adfbcafa3 100644 --- a/test/CodeGen/MSP430/DbgValueOtherTargets.test +++ b/test/CodeGen/MSP430/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=msp430 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=msp430 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg index 972732ebad..0ca9fc9c69 100644 --- a/test/CodeGen/MSP430/lit.local.cfg +++ b/test/CodeGen/MSP430/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'MSP430' in targets: diff --git a/test/CodeGen/Mips/DbgValueOtherTargets.test b/test/CodeGen/Mips/DbgValueOtherTargets.test index 9c351ace68..da20e7ef52 100644 --- a/test/CodeGen/Mips/DbgValueOtherTargets.test +++ b/test/CodeGen/Mips/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=mips -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=mips -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll new file mode 100644 index 0000000000..0752f69c3e --- /dev/null +++ b/test/CodeGen/Mips/dsp-patterns.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s + +; CHECK: test_lbux: +; CHECK: lbux ${{[0-9]+}} + +define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) { +entry: + %add.ptr = getelementptr inbounds i8* %b, i32 %i + %0 = load i8* %add.ptr, align 1 + ret i8 %0 +} + +; CHECK: test_lhx: +; CHECK: lhx ${{[0-9]+}} + +define signext i16 @test_lhx(i16* nocapture %b, i32 %i) { +entry: + %add.ptr = getelementptr inbounds i16* %b, i32 %i + %0 = load i16* %add.ptr, align 2 + ret i16 %0 +} + +; CHECK: test_lwx: +; CHECK: lwx ${{[0-9]+}} + +define i32 @test_lwx(i32* nocapture %b, i32 %i) { +entry: + %add.ptr = getelementptr inbounds i32* %b, i32 %i + %0 = load i32* %add.ptr, align 4 + ret i32 %0 +} diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg index 0587d3243e..e157c540b5 100644 --- a/test/CodeGen/Mips/lit.local.cfg +++ b/test/CodeGen/Mips/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'Mips' in targets: diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll index 0aeabb30e2..0dbb2c27b8 100644 --- a/test/CodeGen/Mips/madd-msub.ll +++ b/test/CodeGen/Mips/madd-msub.ll @@ -1,6 +1,9 @@ -; RUN: llc -march=mips < %s | FileCheck %s +; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=32 +; RUN: llc -march=mips -mattr=dsp < %s | FileCheck %s -check-prefix=DSP +; RUN: llc -march=mips -mcpu=mips16 < %s -; CHECK: madd +; 32: madd ${{[0-9]+}} +; DSP: madd $ac define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone { entry: %conv = sext i32 %a to i64 @@ -11,7 +14,8 @@ entry: ret i64 %add } -; CHECK: maddu +; 32: maddu ${{[0-9]+}} +; DSP: maddu $ac define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone { entry: %conv = zext i32 %a to i64 @@ -22,7 +26,8 @@ entry: ret i64 %add } -; CHECK: madd +; 32: madd ${{[0-9]+}} +; DSP: madd $ac define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone { entry: %conv = sext i32 %a to i64 @@ -32,7 +37,8 @@ entry: ret i64 %add } -; CHECK: msub +; 32: msub ${{[0-9]+}} +; DSP: msub $ac define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone { entry: %conv = sext i32 %c to i64 @@ -43,7 +49,8 @@ entry: ret i64 %sub } -; CHECK: msubu +; 32: msubu ${{[0-9]+}} +; DSP: msubu $ac define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone { entry: %conv = zext i32 %c to i64 @@ -54,7 +61,8 @@ entry: ret i64 %sub } -; CHECK: msub +; 32: msub ${{[0-9]+}} +; DSP: msub $ac define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone { entry: %conv = sext i32 %a to i64 diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll index d93f688ef1..39d52d3826 100644 --- a/test/CodeGen/NVPTX/annotations.ll +++ b/test/CodeGen/NVPTX/annotations.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll b/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll deleted file mode 100644 index 73c77f56bc..0000000000 --- a/test/CodeGen/NVPTX/arithmetic-fp-sm10.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s - -;; These tests should run for all targets - -;;===-- Basic instruction selection tests ---------------------------------===;; - - -;;; f64 - -define double @fadd_f64(double %a, double %b) { -; CHECK: add.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fadd double %a, %b - ret double %ret -} - -define double @fsub_f64(double %a, double %b) { -; CHECK: sub.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fsub double %a, %b - ret double %ret -} - -define double @fmul_f64(double %a, double %b) { -; CHECK: mul.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fmul double %a, %b - ret double %ret -} - -define double @fdiv_f64(double %a, double %b) { -; CHECK: div.rn.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}}, %fl{{[0-9]+}} -; CHECK: ret - %ret = fdiv double %a, %b - ret double %ret -} - -;; PTX does not have a floating-point rem instruction - - -;;; f32 - -define float @fadd_f32(float %a, float %b) { -; CHECK: add.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fadd float %a, %b - ret float %ret -} - -define float @fsub_f32(float %a, float %b) { -; CHECK: sub.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fsub float %a, %b - ret float %ret -} - -define float @fmul_f32(float %a, float %b) { -; CHECK: mul.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fmul float %a, %b - ret float %ret -} - -define float @fdiv_f32(float %a, float %b) { -; CHECK: div.full.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}} -; CHECK: ret - %ret = fdiv float %a, %b - ret float %ret -} - -;; PTX does not have a floating-point rem instruction diff --git a/test/CodeGen/NVPTX/arithmetic-int.ll b/test/CodeGen/NVPTX/arithmetic-int.ll index 529f84900a..8d73b7e6c4 100644 --- a/test/CodeGen/NVPTX/arithmetic-int.ll +++ b/test/CodeGen/NVPTX/arithmetic-int.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll index 968203e5f7..190a1462ad 100644 --- a/test/CodeGen/NVPTX/calling-conv.ll +++ b/test/CodeGen/NVPTX/calling-conv.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/compare-int.ll b/test/CodeGen/NVPTX/compare-int.ll index 12fc754821..16af0a336d 100644 --- a/test/CodeGen/NVPTX/compare-int.ll +++ b/test/CodeGen/NVPTX/compare-int.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/convert-fp.ll b/test/CodeGen/NVPTX/convert-fp.ll index 21c84379b0..1882121fa7 100644 --- a/test/CodeGen/NVPTX/convert-fp.ll +++ b/test/CodeGen/NVPTX/convert-fp.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/convert-int-sm10.ll b/test/CodeGen/NVPTX/convert-int-sm10.ll deleted file mode 100644 index 20716f982e..0000000000 --- a/test/CodeGen/NVPTX/convert-int-sm10.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s - - -; i16 - -define i16 @cvt_i16_i32(i32 %x) { -; CHECK: cvt.u16.u32 %rs{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: ret - %a = trunc i32 %x to i16 - ret i16 %a -} - -define i16 @cvt_i16_i64(i64 %x) { -; CHECK: cvt.u16.u64 %rs{{[0-9]+}}, %rl{{[0-9]+}} -; CHECK: ret - %a = trunc i64 %x to i16 - ret i16 %a -} - - - -; i32 - -define i32 @cvt_i32_i16(i16 %x) { -; CHECK: cvt.u32.u16 %r{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: ret - %a = zext i16 %x to i32 - ret i32 %a -} - -define i32 @cvt_i32_i64(i64 %x) { -; CHECK: cvt.u32.u64 %r{{[0-9]+}}, %rl{{[0-9]+}} -; CHECK: ret - %a = trunc i64 %x to i32 - ret i32 %a -} - - - -; i64 - -define i64 @cvt_i64_i16(i16 %x) { -; CHECK: cvt.u64.u16 %rl{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: ret - %a = zext i16 %x to i64 - ret i64 %a -} - -define i64 @cvt_i64_i32(i32 %x) { -; CHECK: cvt.u64.u32 %rl{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: ret - %a = zext i32 %x to i64 - ret i64 %a -} diff --git a/test/CodeGen/NVPTX/intrinsic-old.ll b/test/CodeGen/NVPTX/intrinsic-old.ll index 1c9879c417..53a28f3337 100644 --- a/test/CodeGen/NVPTX/intrinsic-old.ll +++ b/test/CodeGen/NVPTX/intrinsic-old.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/intrinsics.ll b/test/CodeGen/NVPTX/intrinsics.ll index afab60ca96..8b0357be87 100644 --- a/test/CodeGen/NVPTX/intrinsics.ll +++ b/test/CodeGen/NVPTX/intrinsics.ll @@ -1,5 +1,3 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s diff --git a/test/CodeGen/NVPTX/ld-addrspace.ll b/test/CodeGen/NVPTX/ld-addrspace.ll index d1f5093df2..3265868d3c 100644 --- a/test/CodeGen/NVPTX/ld-addrspace.ll +++ b/test/CodeGen/NVPTX/ld-addrspace.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll new file mode 100644 index 0000000000..0d02194651 --- /dev/null +++ b/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0 +; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1 + +@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00" + +declare i32 @__nvvm_reflect(i8*) +declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*) + +define float @foo(float %a, float %b) { +; USE_MUL_0: define float @foo +; USE_MUL_0-NOT: call i32 @__nvvm_reflect +; USE_MUL_1: define float @foo +; USE_MUL_1-NOT: call i32 @__nvvm_reflect + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0)) + %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) + %cmp = icmp ugt i32 %reflect, 0 + br i1 %cmp, label %use_mul, label %use_add + +use_mul: +; USE_MUL_1: fmul float %a, %b +; USE_MUL_0-NOT: fadd float %a, %b + %ret1 = fmul float %a, %b + br label %exit + +use_add: +; USE_MUL_0: fadd float %a, %b +; USE_MUL_1-NOT: fmul float %a, %b + %ret2 = fadd float %a, %b + br label %exit + +exit: + %ret = phi float [%ret1, %use_mul], [%ret2, %use_add] + ret float %ret +} diff --git a/test/CodeGen/NVPTX/sched1.ll b/test/CodeGen/NVPTX/sched1.ll new file mode 100644 index 0000000000..03ab635e73 --- /dev/null +++ b/test/CodeGen/NVPTX/sched1.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +; Ensure source scheduling is working + +define void @foo(i32* %a) { +; CHECK: .func foo +; CHECK: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: ld.u32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 + %ptr0 = getelementptr i32* %a, i32 0 + %val0 = load i32* %ptr0 + %ptr1 = getelementptr i32* %a, i32 1 + %val1 = load i32* %ptr1 + %ptr2 = getelementptr i32* %a, i32 2 + %val2 = load i32* %ptr2 + %ptr3 = getelementptr i32* %a, i32 3 + %val3 = load i32* %ptr3 + + %t0 = add i32 %val0, %val1 + %t1 = add i32 %t0, %val2 + %t2 = add i32 %t1, %val3 + + store i32 %t2, i32* %a + + ret void +} + diff --git a/test/CodeGen/NVPTX/sched2.ll b/test/CodeGen/NVPTX/sched2.ll new file mode 100644 index 0000000000..71a9a4963f --- /dev/null +++ b/test/CodeGen/NVPTX/sched2.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + +define void @foo(<2 x i32>* %a) { +; CHECK: .func foo +; CHECK: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: ld.v2.u32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 +; CHECK-NEXT: add.s32 + %ptr0 = getelementptr <2 x i32>* %a, i32 0 + %val0 = load <2 x i32>* %ptr0 + %ptr1 = getelementptr <2 x i32>* %a, i32 1 + %val1 = load <2 x i32>* %ptr1 + %ptr2 = getelementptr <2 x i32>* %a, i32 2 + %val2 = load <2 x i32>* %ptr2 + %ptr3 = getelementptr <2 x i32>* %a, i32 3 + %val3 = load <2 x i32>* %ptr3 + + %t0 = add <2 x i32> %val0, %val1 + %t1 = add <2 x i32> %t0, %val2 + %t2 = add <2 x i32> %t1, %val3 + + store <2 x i32> %t2, <2 x i32>* %a + + ret void +} + diff --git a/test/CodeGen/NVPTX/sm-version-10.ll b/test/CodeGen/NVPTX/sm-version-10.ll deleted file mode 100644 index 9324a37809..0000000000 --- a/test/CodeGen/NVPTX/sm-version-10.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s - - -; CHECK: .target sm_10 - diff --git a/test/CodeGen/NVPTX/sm-version-11.ll b/test/CodeGen/NVPTX/sm-version-11.ll deleted file mode 100644 index 9033a4eba5..0000000000 --- a/test/CodeGen/NVPTX/sm-version-11.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_11 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_11 | FileCheck %s - - -; CHECK: .target sm_11 - diff --git a/test/CodeGen/NVPTX/sm-version-12.ll b/test/CodeGen/NVPTX/sm-version-12.ll deleted file mode 100644 index d8ee85c901..0000000000 --- a/test/CodeGen/NVPTX/sm-version-12.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_12 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_12 | FileCheck %s - - -; CHECK: .target sm_12 - diff --git a/test/CodeGen/NVPTX/sm-version-13.ll b/test/CodeGen/NVPTX/sm-version-13.ll deleted file mode 100644 index ad67d642ce..0000000000 --- a/test/CodeGen/NVPTX/sm-version-13.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_13 | FileCheck %s -; RUN: llc < %s -march=nvptx64 -mcpu=sm_13 | FileCheck %s - - -; CHECK: .target sm_13 - diff --git a/test/CodeGen/NVPTX/st-addrspace.ll b/test/CodeGen/NVPTX/st-addrspace.ll index 54e04ae610..0b26d802df 100644 --- a/test/CodeGen/NVPTX/st-addrspace.ll +++ b/test/CodeGen/NVPTX/st-addrspace.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_10 | FileCheck %s --check-prefix=PTX32 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32 -; RUN: llc < %s -march=nvptx64 -mcpu=sm_10 | FileCheck %s --check-prefix=PTX64 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64 diff --git a/test/CodeGen/NVPTX/tuple-literal.ll b/test/CodeGen/NVPTX/tuple-literal.ll index 5c0cb2c15c..2b1f2c4b66 100644 --- a/test/CodeGen/NVPTX/tuple-literal.ll +++ b/test/CodeGen/NVPTX/tuple-literal.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=nvptx -mcpu=sm_13 +; RUN: llc < %s -march=nvptx -mcpu=sm_20 define ptx_device void @test_function({i8, i8}*) { ret void diff --git a/test/CodeGen/NVPTX/vector-args.ll b/test/CodeGen/NVPTX/vector-args.ll new file mode 100644 index 0000000000..80deae4693 --- /dev/null +++ b/test/CodeGen/NVPTX/vector-args.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s + + +define float @foo(<2 x float> %a) { +; CHECK: .func (.param .b32 func_retval0) foo +; CHECK: .param .align 8 .b8 foo_param_0[8] +; CHECK: ld.param.f32 %f{{[0-9]+}} +; CHECK: ld.param.f32 %f{{[0-9]+}} + %t1 = fmul <2 x float> %a, %a + %t2 = extractelement <2 x float> %t1, i32 0 + %t3 = extractelement <2 x float> %t1, i32 1 + %t4 = fadd float %t2, %t3 + ret float %t4 +} + + +define float @bar(<4 x float> %a) { +; CHECK: .func (.param .b32 func_retval0) bar +; CHECK: .param .align 16 .b8 bar_param_0[16] +; CHECK: ld.param.f32 %f{{[0-9]+}} +; CHECK: ld.param.f32 %f{{[0-9]+}} + %t1 = fmul <4 x float> %a, %a + %t2 = extractelement <4 x float> %t1, i32 0 + %t3 = extractelement <4 x float> %t1, i32 1 + %t4 = fadd float %t2, %t3 + ret float %t4 +} diff --git a/test/CodeGen/NVPTX/vector-loads.ll b/test/CodeGen/NVPTX/vector-loads.ll index f5a1795e3c..58882bf166 100644 --- a/test/CodeGen/NVPTX/vector-loads.ll +++ b/test/CodeGen/NVPTX/vector-loads.ll @@ -9,7 +9,7 @@ define void @foo(<2 x float>* %a) { ; CHECK: .func foo -; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <2 x float>* %a %t2 = fmul <2 x float> %t1, %t1 store <2 x float> %t2, <2 x float>* %a @@ -18,7 +18,7 @@ define void @foo(<2 x float>* %a) { define void @foo2(<4 x float>* %a) { ; CHECK: .func foo2 -; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <4 x float>* %a %t2 = fmul <4 x float> %t1, %t1 store <4 x float> %t2, <4 x float>* %a @@ -27,8 +27,8 @@ define void @foo2(<4 x float>* %a) { define void @foo3(<8 x float>* %a) { ; CHECK: .func foo3 -; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}]; -; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}, [%r{{[0-9]+}}+16]; +; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} +; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}} %t1 = load <8 x float>* %a %t2 = fmul <8 x float> %t1, %t1 store <8 x float> %t2, <8 x float>* %a @@ -39,7 +39,7 @@ define void @foo3(<8 x float>* %a) { define void @foo4(<2 x i32>* %a) { ; CHECK: .func foo4 -; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <2 x i32>* %a %t2 = mul <2 x i32> %t1, %t1 store <2 x i32> %t2, <2 x i32>* %a @@ -48,7 +48,7 @@ define void @foo4(<2 x i32>* %a) { define void @foo5(<4 x i32>* %a) { ; CHECK: .func foo5 -; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; +; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <4 x i32>* %a %t2 = mul <4 x i32> %t1, %t1 store <4 x i32> %t2, <4 x i32>* %a @@ -57,8 +57,8 @@ define void @foo5(<4 x i32>* %a) { define void @foo6(<8 x i32>* %a) { ; CHECK: .func foo6 -; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}]; -; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}, [%r{{[0-9]+}}+16]; +; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} +; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}} %t1 = load <8 x i32>* %a %t2 = mul <8 x i32> %t1, %t1 store <8 x i32> %t2, <8 x i32>* %a diff --git a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll index 82ef2b82cb..b6feb5abbc 100644 --- a/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll +++ b/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc64 | grep dst | count 4 +; RUN: llc < %s -march=ppc64 -mattr=+altivec | grep dst | count 4 define hidden void @_Z4borkPc(i8* %image) { entry: diff --git a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll index 84aa40c4b5..91253daae3 100644 --- a/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll +++ b/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll @@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0" define void @foo(i32 %y) nounwind ssp { entry: ; CHECK: foo -; CHECK: add r3 -; CHECK: 0(r3) +; CHECK: add r2 +; CHECK: 0(r2) %y_addr = alloca i32 ; <i32*> [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] store i32 %y, i32* %y_addr diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll index 974a99a52c..097611a761 100644 --- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll +++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll @@ -2,21 +2,21 @@ ; ModuleID = 'hh.c' target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" target triple = "powerpc-apple-darwin9.6" -; This formerly used R0 for both the stack address and CR. define void @foo() nounwind { entry: -;CHECK: mfcr r2 -;CHECK: lis r3, 1 -;CHECK: rlwinm r2, r2, 8, 0, 31 -;CHECK: ori r3, r3, 34524 -;CHECK: stwx r2, r1, r3 -; Make sure that the register scavenger returns the same temporary register. -;CHECK: mfcr r2 -;CHECK: lis r3, 1 -;CHECK: rlwinm r2, r2, 12, 0, 31 -;CHECK: ori r3, r3, 34520 -;CHECK: stwx r2, r1, r3 +; Note that part of what is being checked here is proper register reuse. +; CHECK: mfcr [[T1:r[0-9]+]] ; cr2 +; CHECK: lis [[T2:r[0-9]+]], 1 +; CHECK: addi r3, r1, 72 +; CHECK: rlwinm [[T1]], [[T1]], 8, 0, 31 +; CHECK: ori [[T2]], [[T2]], 34540 +; CHECK: stwx [[T1]], r1, [[T2]] +; CHECK: lis [[T3:r[0-9]+]], 1 +; CHECK: mfcr [[T4:r[0-9]+]] ; cr3 +; CHECK: ori [[T3]], [[T3]], 34536 +; CHECK: rlwinm [[T4]], [[T4]], 12, 0, 31 +; CHECK: stwx [[T4]], r1, [[T3]] %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] %x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1] @@ -25,11 +25,16 @@ entry: br label %return return: ; preds = %entry -;CHECK: lis r3, 1 -;CHECK: ori r3, r3, 34524 -;CHECK: lwzx r2, r1, r3 -;CHECK: rlwinm r2, r2, 24, 0, 31 -;CHECK: mtcrf 32, r2 +; CHECK: lis [[T1:r[0-9]+]], 1 +; CHECK: ori [[T1]], [[T1]], 34536 +; CHECK: lwzx [[T1]], r1, [[T1]] +; CHECK: rlwinm [[T1]], [[T1]], 20, 0, 31 +; CHECK: mtcrf 16, [[T1]] +; CHECK: lis [[T1]], 1 +; CHECK: ori [[T1]], [[T1]], 34540 +; CHECK: lwzx [[T1]], r1, [[T1]] +; CHECK: rlwinm [[T1]], [[T1]], 24, 0, 31 +; CHECK: mtcrf 32, [[T1]] ret void } diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll index 72ae9d6c73..0dbc2d0180 100644 --- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll +++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll @@ -18,8 +18,8 @@ entry: ; CHECK: _g: ; CHECK: mflr r0 ; CHECK: stw r0, 8(r1) -; CHECK: lwz r3, 0(r1) -; CHECK: lwz r3, 8(r3) +; CHECK: lwz r2, 0(r1) +; CHECK: lwz r3, 8(r2) %0 = tail call i8* @llvm.returnaddress(i32 1) ; <i8*> [#uses=1] ret i8* %0 } diff --git a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll index bf3d577a36..d1a3c9f46b 100644 --- a/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll +++ b/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll @@ -7,7 +7,7 @@ define i32 @main() nounwind { entry: ; Make sure we're generating references using the red zone ; CHECK: main: -; CHECK: stw r3, -12(r1) +; CHECK: stw r2, -12(r1) %retval = alloca i32 %0 = alloca i32 %"alloca point" = bitcast i32 0 to i32 diff --git a/test/CodeGen/PowerPC/DbgValueOtherTargets.test b/test/CodeGen/PowerPC/DbgValueOtherTargets.test index b1b338776b..9702934f7e 100644 --- a/test/CodeGen/PowerPC/DbgValueOtherTargets.test +++ b/test/CodeGen/PowerPC/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=ppc32 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll index 6f985c819f..e8765deab0 100644 --- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll +++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll @@ -1,9 +1,9 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \ -; RUN: grep "stw r4, 32751" +; RUN: grep "stw r3, 32751" ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ -; RUN: grep "stw r4, 32751" +; RUN: grep "stw r3, 32751" ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \ -; RUN: grep "std r4, 9024" +; RUN: grep "std r3, 9024" define void @test() nounwind { store i32 0, i32* inttoptr (i64 48725999 to i32*) diff --git a/test/CodeGen/PowerPC/Stats/lit.local.cfg b/test/CodeGen/PowerPC/Stats/lit.local.cfg deleted file mode 100644 index 2608e139e9..0000000000 --- a/test/CodeGen/PowerPC/Stats/lit.local.cfg +++ /dev/null @@ -1,8 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -targets = set(config.root.targets_to_build.split()) -if not 'PowerPC' in targets: - config.unsupported = True - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/CodeGen/PowerPC/allocate-r0.ll b/test/CodeGen/PowerPC/allocate-r0.ll new file mode 100644 index 0000000000..1cf4cec076 --- /dev/null +++ b/test/CodeGen/PowerPC/allocate-r0.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i64 @foo(i64 %a) nounwind { +entry: + call void asm sideeffect "", "~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() nounwind + br label %return + +; CHECK: @foo +; Because r0 is allocatable, we can use it to hold r3 without spilling. +; CHECK: mr 0, 3 +; CHECK: mr 3, 0 + +return: ; preds = %entry + ret i64 %a +} + diff --git a/test/CodeGen/PowerPC/asym-regclass-copy.ll b/test/CodeGen/PowerPC/asym-regclass-copy.ll new file mode 100644 index 0000000000..d04a6c98ee --- /dev/null +++ b/test/CodeGen/PowerPC/asym-regclass-copy.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; This tests that the GPRC/GPRC_NOR0 intersection subclass relationship with +; GPRC is handled correctly. When it was not, this test would assert. + +@gen_random.last = external unnamed_addr global i64, align 8 +@.str = external unnamed_addr constant [4 x i8], align 1 + +declare double @gen_random(double) #0 + +declare void @benchmark_heapsort(i32 signext, double* nocapture) #0 + +define signext i32 @main(i32 signext %argc, i8** nocapture %argv) #0 { +entry: + br i1 undef, label %cond.true, label %cond.end + +cond.true: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.true, %entry + %cond = phi i32 [ 0, %cond.true ], [ 8000000, %entry ] + %add = add i32 %cond, 1 + %conv = sext i32 %add to i64 + %mul = shl nsw i64 %conv, 3 + %call1 = tail call noalias i8* @malloc(i64 %mul) #1 + br i1 undef, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %cond.end + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + %indvars.iv = phi i64 [ 1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %add + br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %cond.end + ret i32 0 +} + +declare noalias i8* @malloc(i64) #0 + +declare signext i32 @printf(i8* nocapture, ...) #0 + +declare void @free(i8* nocapture) #0 + +declare i64 @strtol(i8*, i8** nocapture, i32 signext) #0 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll index cbfa4094fb..838db20ddd 100644 --- a/test/CodeGen/PowerPC/atomic-1.ll +++ b/test/CodeGen/PowerPC/atomic-1.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=ppc32 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 | FileCheck %s define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind { ; CHECK: exchange_and_add: -; CHECK: lwarx +; CHECK: lwarx {{r[0-9]+}}, 0, {{r[0-9]+}} %tmp = atomicrmw add i32* %mem, i32 %val monotonic -; CHECK: stwcx. +; CHECK: stwcx. {{r[0-9]+}}, 0, {{r[0-9]+}} ret i32 %tmp } diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll index 4f6bfc7299..53bbc52167 100644 --- a/test/CodeGen/PowerPC/bswap-load-store.ll +++ b/test/CodeGen/PowerPC/bswap-load-store.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -march=ppc32 -mcpu=ppc32 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -march=ppc64 -mcpu=ppc64 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=PWR7 +; RUN: llc < %s -march=ppc32 -mcpu=pwr7 | FileCheck %s -check-prefix=X32 define void @STWBRX(i32 %i, i8* %ptr, i32 %off) { @@ -34,18 +36,47 @@ define i16 @LHBRX(i8* %ptr, i32 %off) { ret i16 %tmp6 } +define void @STDBRX(i64 %i, i8* %ptr, i64 %off) { + %tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1] + %tmp1.upgrd.1 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1] + %tmp13 = tail call i64 @llvm.bswap.i64( i64 %i ) ; <i64> [#uses=1] + store i64 %tmp13, i64* %tmp1.upgrd.1 + ret void +} + +define i64 @LDBRX(i8* %ptr, i64 %off) { + %tmp1 = getelementptr i8* %ptr, i64 %off ; <i8*> [#uses=1] + %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64* ; <i64*> [#uses=1] + %tmp = load i64* %tmp1.upgrd.2 ; <i64> [#uses=1] + %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp ) ; <i64> [#uses=1] + ret i64 %tmp14 +} + declare i32 @llvm.bswap.i32(i32) declare i16 @llvm.bswap.i16(i16) +declare i64 @llvm.bswap.i64(i64) + ; X32: stwbrx ; X32: lwbrx ; X32: sthbrx ; X32: lhbrx +; X32-NOT: ldbrx +; X32-NOT: stdbrx ; X64: stwbrx ; X64: lwbrx ; X64: sthbrx ; X64: lhbrx +; X64-NOT: ldbrx +; X64-NOT: stdbrx + +; PWR7: stwbrx +; PWR7: lwbrx +; PWR7: sthbrx +; PWR7: lhbrx +; PWR7: stdbrx +; PWR7: ldbrx diff --git a/test/CodeGen/PowerPC/cr-spills.ll b/test/CodeGen/PowerPC/cr-spills.ll new file mode 100644 index 0000000000..d6df7a2376 --- /dev/null +++ b/test/CodeGen/PowerPC/cr-spills.ll @@ -0,0 +1,409 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; This test case triggers several functions related to cr spilling, both in +; frame lowering and to handle cr register pressure. When the register kill +; flags were not being set correctly, this would cause the register scavenger to +; assert. + +@SetupFastFullPelSearch.orig_pels = external unnamed_addr global [768 x i16], align 2 +@weight_luma = external global i32 +@offset_luma = external global i32 +@wp_luma_round = external global i32, align 4 +@luma_log_weight_denom = external global i32, align 4 + +define void @SetupFastFullPelSearch() #0 { +entry: + %mul10 = mul nsw i32 undef, undef + br i1 undef, label %land.end, label %land.lhs.true + +land.lhs.true: ; preds = %entry + switch i32 0, label %land.end [ + i32 0, label %land.rhs + i32 3, label %land.rhs + ] + +land.rhs: ; preds = %land.lhs.true, %land.lhs.true + %tobool21 = icmp ne i32 undef, 0 + br label %land.end + +land.end: ; preds = %land.rhs, %land.lhs.true, %entry + %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ] + %cond = load i32** undef, align 8 + br i1 undef, label %if.then95, label %for.body.lr.ph + +if.then95: ; preds = %land.end + %cmp.i4.i1427 = icmp slt i32 undef, undef + br label %for.body.lr.ph + +for.body.lr.ph: ; preds = %if.then95, %land.end + br label %for.body + +for.body: ; preds = %for.body, %for.body.lr.ph + br i1 undef, label %for.body, label %for.body252 + +for.body252: ; preds = %for.inc997, %for.body + %shl263 = add i32 undef, 80 + br i1 %0, label %for.cond286.preheader, label %for.cond713.preheader + +for.cond286.preheader: ; preds = %for.body252 + br label %for.cond290.preheader + +for.cond290.preheader: ; preds = %for.end520, %for.cond286.preheader + %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ] + %1 = load i32* undef, align 4, !tbaa !0 + %2 = load i32* @weight_luma, align 4, !tbaa !0 + %3 = load i32* @wp_luma_round, align 4, !tbaa !0 + %4 = load i32* @luma_log_weight_denom, align 4, !tbaa !0 + %5 = load i32* @offset_luma, align 4, !tbaa !0 + %incdec.ptr502.sum = add i64 undef, 16 + br label %for.body293 + +for.body293: ; preds = %for.body293, %for.cond290.preheader + %srcptr.41591 = phi i16* [ %srcptr.31595, %for.cond290.preheader ], [ undef, %for.body293 ] + %refptr.11590 = phi i16* [ undef, %for.cond290.preheader ], [ %add.ptr517, %for.body293 ] + %LineSadBlk0.01588 = phi i32 [ 0, %for.cond290.preheader ], [ %add346, %for.body293 ] + %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ] + %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ] + %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ] + %6 = load i16* %refptr.11590, align 2, !tbaa !3 + %conv294 = zext i16 %6 to i32 + %mul295 = mul nsw i32 %conv294, %2 + %add296 = add nsw i32 %mul295, %3 + %shr = ashr i32 %add296, %4 + %add297 = add nsw i32 %shr, %5 + %cmp.i.i1513 = icmp sgt i32 %add297, 0 + %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0 + %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1 + %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1 + %7 = load i16* %srcptr.41591, align 2, !tbaa !3 + %conv300 = zext i16 %7 to i32 + %sub301 = sub nsw i32 %cond.i5.i1516, %conv300 + %idxprom302 = sext i32 %sub301 to i64 + %arrayidx303 = getelementptr inbounds i32* %cond, i64 %idxprom302 + %8 = load i32* %arrayidx303, align 4, !tbaa !0 + %add304 = add nsw i32 %8, %LineSadBlk0.01588 + %9 = load i32* undef, align 4, !tbaa !0 + %add318 = add nsw i32 %add304, %9 + %10 = load i16* undef, align 2, !tbaa !3 + %conv321 = zext i16 %10 to i32 + %mul322 = mul nsw i32 %conv321, %2 + %add323 = add nsw i32 %mul322, %3 + %shr324 = ashr i32 %add323, %4 + %add325 = add nsw i32 %shr324, %5 + %cmp.i.i1505 = icmp sgt i32 %add325, 0 + %cond.i.i1506 = select i1 %cmp.i.i1505, i32 %add325, i32 0 + %cmp.i4.i1507 = icmp slt i32 %cond.i.i1506, %1 + %cond.i5.i1508 = select i1 %cmp.i4.i1507, i32 %cond.i.i1506, i32 %1 + %sub329 = sub nsw i32 %cond.i5.i1508, 0 + %idxprom330 = sext i32 %sub329 to i64 + %arrayidx331 = getelementptr inbounds i32* %cond, i64 %idxprom330 + %11 = load i32* %arrayidx331, align 4, !tbaa !0 + %add332 = add nsw i32 %add318, %11 + %cmp.i.i1501 = icmp sgt i32 undef, 0 + %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0 + %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1 + %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1 + %incdec.ptr341 = getelementptr inbounds i16* %srcptr.41591, i64 4 + %12 = load i16* null, align 2, !tbaa !3 + %conv342 = zext i16 %12 to i32 + %sub343 = sub nsw i32 %cond.i5.i1504, %conv342 + %idxprom344 = sext i32 %sub343 to i64 + %arrayidx345 = getelementptr inbounds i32* %cond, i64 %idxprom344 + %13 = load i32* %arrayidx345, align 4, !tbaa !0 + %add346 = add nsw i32 %add332, %13 + %incdec.ptr348 = getelementptr inbounds i16* %refptr.11590, i64 5 + %14 = load i16* null, align 2, !tbaa !3 + %conv349 = zext i16 %14 to i32 + %mul350 = mul nsw i32 %conv349, %2 + %add351 = add nsw i32 %mul350, %3 + %shr352 = ashr i32 %add351, %4 + %add353 = add nsw i32 %shr352, %5 + %cmp.i.i1497 = icmp sgt i32 %add353, 0 + %cond.i.i1498 = select i1 %cmp.i.i1497, i32 %add353, i32 0 + %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1 + %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1 + %incdec.ptr355 = getelementptr inbounds i16* %srcptr.41591, i64 5 + %15 = load i16* %incdec.ptr341, align 2, !tbaa !3 + %conv356 = zext i16 %15 to i32 + %sub357 = sub nsw i32 %cond.i5.i1500, %conv356 + %idxprom358 = sext i32 %sub357 to i64 + %arrayidx359 = getelementptr inbounds i32* %cond, i64 %idxprom358 + %16 = load i32* %arrayidx359, align 4, !tbaa !0 + %add360 = add nsw i32 %16, %LineSadBlk1.01587 + %incdec.ptr362 = getelementptr inbounds i16* %refptr.11590, i64 6 + %17 = load i16* %incdec.ptr348, align 2, !tbaa !3 + %conv363 = zext i16 %17 to i32 + %mul364 = mul nsw i32 %conv363, %2 + %add365 = add nsw i32 %mul364, %3 + %shr366 = ashr i32 %add365, %4 + %add367 = add nsw i32 %shr366, %5 + %cmp.i.i1493 = icmp sgt i32 %add367, 0 + %cond.i.i1494 = select i1 %cmp.i.i1493, i32 %add367, i32 0 + %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1 + %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1 + %incdec.ptr369 = getelementptr inbounds i16* %srcptr.41591, i64 6 + %18 = load i16* %incdec.ptr355, align 2, !tbaa !3 + %conv370 = zext i16 %18 to i32 + %sub371 = sub nsw i32 %cond.i5.i1496, %conv370 + %idxprom372 = sext i32 %sub371 to i64 + %arrayidx373 = getelementptr inbounds i32* %cond, i64 %idxprom372 + %19 = load i32* %arrayidx373, align 4, !tbaa !0 + %add374 = add nsw i32 %add360, %19 + %incdec.ptr376 = getelementptr inbounds i16* %refptr.11590, i64 7 + %20 = load i16* %incdec.ptr362, align 2, !tbaa !3 + %conv377 = zext i16 %20 to i32 + %mul378 = mul nsw i32 %conv377, %2 + %add379 = add nsw i32 %mul378, %3 + %shr380 = ashr i32 %add379, %4 + %add381 = add nsw i32 %shr380, %5 + %cmp.i.i1489 = icmp sgt i32 %add381, 0 + %cond.i.i1490 = select i1 %cmp.i.i1489, i32 %add381, i32 0 + %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1 + %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1 + %incdec.ptr383 = getelementptr inbounds i16* %srcptr.41591, i64 7 + %21 = load i16* %incdec.ptr369, align 2, !tbaa !3 + %conv384 = zext i16 %21 to i32 + %sub385 = sub nsw i32 %cond.i5.i1492, %conv384 + %idxprom386 = sext i32 %sub385 to i64 + %arrayidx387 = getelementptr inbounds i32* %cond, i64 %idxprom386 + %22 = load i32* %arrayidx387, align 4, !tbaa !0 + %add388 = add nsw i32 %add374, %22 + %23 = load i16* %incdec.ptr376, align 2, !tbaa !3 + %conv391 = zext i16 %23 to i32 + %mul392 = mul nsw i32 %conv391, %2 + %add395 = add nsw i32 0, %5 + %cmp.i.i1485 = icmp sgt i32 %add395, 0 + %cond.i.i1486 = select i1 %cmp.i.i1485, i32 %add395, i32 0 + %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1 + %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1 + %incdec.ptr397 = getelementptr inbounds i16* %srcptr.41591, i64 8 + %24 = load i16* %incdec.ptr383, align 2, !tbaa !3 + %conv398 = zext i16 %24 to i32 + %sub399 = sub nsw i32 %cond.i5.i1488, %conv398 + %idxprom400 = sext i32 %sub399 to i64 + %arrayidx401 = getelementptr inbounds i32* %cond, i64 %idxprom400 + %25 = load i32* %arrayidx401, align 4, !tbaa !0 + %add402 = add nsw i32 %add388, %25 + %incdec.ptr404 = getelementptr inbounds i16* %refptr.11590, i64 9 + %cmp.i4.i1483 = icmp slt i32 undef, %1 + %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1 + %26 = load i16* %incdec.ptr397, align 2, !tbaa !3 + %conv412 = zext i16 %26 to i32 + %sub413 = sub nsw i32 %cond.i5.i1484, %conv412 + %idxprom414 = sext i32 %sub413 to i64 + %arrayidx415 = getelementptr inbounds i32* %cond, i64 %idxprom414 + %27 = load i32* %arrayidx415, align 4, !tbaa !0 + %add416 = add nsw i32 %27, %LineSadBlk2.01585 + %incdec.ptr418 = getelementptr inbounds i16* %refptr.11590, i64 10 + %28 = load i16* %incdec.ptr404, align 2, !tbaa !3 + %conv419 = zext i16 %28 to i32 + %mul420 = mul nsw i32 %conv419, %2 + %add421 = add nsw i32 %mul420, %3 + %shr422 = ashr i32 %add421, %4 + %add423 = add nsw i32 %shr422, %5 + %cmp.i.i1477 = icmp sgt i32 %add423, 0 + %cond.i.i1478 = select i1 %cmp.i.i1477, i32 %add423, i32 0 + %cmp.i4.i1479 = icmp slt i32 %cond.i.i1478, %1 + %cond.i5.i1480 = select i1 %cmp.i4.i1479, i32 %cond.i.i1478, i32 %1 + %incdec.ptr425 = getelementptr inbounds i16* %srcptr.41591, i64 10 + %sub427 = sub nsw i32 %cond.i5.i1480, 0 + %idxprom428 = sext i32 %sub427 to i64 + %arrayidx429 = getelementptr inbounds i32* %cond, i64 %idxprom428 + %29 = load i32* %arrayidx429, align 4, !tbaa !0 + %add430 = add nsw i32 %add416, %29 + %incdec.ptr432 = getelementptr inbounds i16* %refptr.11590, i64 11 + %30 = load i16* %incdec.ptr418, align 2, !tbaa !3 + %conv433 = zext i16 %30 to i32 + %mul434 = mul nsw i32 %conv433, %2 + %add435 = add nsw i32 %mul434, %3 + %shr436 = ashr i32 %add435, %4 + %add437 = add nsw i32 %shr436, %5 + %cmp.i.i1473 = icmp sgt i32 %add437, 0 + %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0 + %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1 + %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1 + %31 = load i16* %incdec.ptr425, align 2, !tbaa !3 + %conv440 = zext i16 %31 to i32 + %sub441 = sub nsw i32 %cond.i5.i1476, %conv440 + %idxprom442 = sext i32 %sub441 to i64 + %arrayidx443 = getelementptr inbounds i32* %cond, i64 %idxprom442 + %32 = load i32* %arrayidx443, align 4, !tbaa !0 + %add444 = add nsw i32 %add430, %32 + %incdec.ptr446 = getelementptr inbounds i16* %refptr.11590, i64 12 + %33 = load i16* %incdec.ptr432, align 2, !tbaa !3 + %conv447 = zext i16 %33 to i32 + %mul448 = mul nsw i32 %conv447, %2 + %add449 = add nsw i32 %mul448, %3 + %shr450 = ashr i32 %add449, %4 + %add451 = add nsw i32 %shr450, %5 + %cmp.i.i1469 = icmp sgt i32 %add451, 0 + %cond.i.i1470 = select i1 %cmp.i.i1469, i32 %add451, i32 0 + %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1 + %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1 + %incdec.ptr453 = getelementptr inbounds i16* %srcptr.41591, i64 12 + %34 = load i16* undef, align 2, !tbaa !3 + %conv454 = zext i16 %34 to i32 + %sub455 = sub nsw i32 %cond.i5.i1472, %conv454 + %idxprom456 = sext i32 %sub455 to i64 + %arrayidx457 = getelementptr inbounds i32* %cond, i64 %idxprom456 + %35 = load i32* %arrayidx457, align 4, !tbaa !0 + %add458 = add nsw i32 %add444, %35 + %incdec.ptr460 = getelementptr inbounds i16* %refptr.11590, i64 13 + %36 = load i16* %incdec.ptr446, align 2, !tbaa !3 + %conv461 = zext i16 %36 to i32 + %mul462 = mul nsw i32 %conv461, %2 + %add463 = add nsw i32 %mul462, %3 + %shr464 = ashr i32 %add463, %4 + %add465 = add nsw i32 %shr464, %5 + %cmp.i.i1465 = icmp sgt i32 %add465, 0 + %cond.i.i1466 = select i1 %cmp.i.i1465, i32 %add465, i32 0 + %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1 + %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1 + %incdec.ptr467 = getelementptr inbounds i16* %srcptr.41591, i64 13 + %37 = load i16* %incdec.ptr453, align 2, !tbaa !3 + %conv468 = zext i16 %37 to i32 + %sub469 = sub nsw i32 %cond.i5.i1468, %conv468 + %idxprom470 = sext i32 %sub469 to i64 + %arrayidx471 = getelementptr inbounds i32* %cond, i64 %idxprom470 + %38 = load i32* %arrayidx471, align 4, !tbaa !0 + %add472 = add nsw i32 %38, %LineSadBlk3.01586 + %incdec.ptr474 = getelementptr inbounds i16* %refptr.11590, i64 14 + %add477 = add nsw i32 0, %3 + %shr478 = ashr i32 %add477, %4 + %add479 = add nsw i32 %shr478, %5 + %cmp.i.i1461 = icmp sgt i32 %add479, 0 + %cond.i.i1462 = select i1 %cmp.i.i1461, i32 %add479, i32 0 + %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1 + %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1 + %incdec.ptr481 = getelementptr inbounds i16* %srcptr.41591, i64 14 + %39 = load i16* %incdec.ptr467, align 2, !tbaa !3 + %conv482 = zext i16 %39 to i32 + %sub483 = sub nsw i32 %cond.i5.i1464, %conv482 + %idxprom484 = sext i32 %sub483 to i64 + %arrayidx485 = getelementptr inbounds i32* %cond, i64 %idxprom484 + %40 = load i32* %arrayidx485, align 4, !tbaa !0 + %add486 = add nsw i32 %add472, %40 + %incdec.ptr488 = getelementptr inbounds i16* %refptr.11590, i64 15 + %41 = load i16* %incdec.ptr474, align 2, !tbaa !3 + %conv489 = zext i16 %41 to i32 + %mul490 = mul nsw i32 %conv489, %2 + %add491 = add nsw i32 %mul490, %3 + %shr492 = ashr i32 %add491, %4 + %add493 = add nsw i32 %shr492, %5 + %cmp.i.i1457 = icmp sgt i32 %add493, 0 + %cond.i.i1458 = select i1 %cmp.i.i1457, i32 %add493, i32 0 + %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1 + %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1 + %incdec.ptr495 = getelementptr inbounds i16* %srcptr.41591, i64 15 + %42 = load i16* %incdec.ptr481, align 2, !tbaa !3 + %conv496 = zext i16 %42 to i32 + %sub497 = sub nsw i32 %cond.i5.i1460, %conv496 + %idxprom498 = sext i32 %sub497 to i64 + %arrayidx499 = getelementptr inbounds i32* %cond, i64 %idxprom498 + %43 = load i32* %arrayidx499, align 4, !tbaa !0 + %add500 = add nsw i32 %add486, %43 + %44 = load i16* %incdec.ptr488, align 2, !tbaa !3 + %conv503 = zext i16 %44 to i32 + %mul504 = mul nsw i32 %conv503, %2 + %add505 = add nsw i32 %mul504, %3 + %shr506 = ashr i32 %add505, %4 + %add507 = add nsw i32 %shr506, %5 + %cmp.i.i1453 = icmp sgt i32 %add507, 0 + %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0 + %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1 + %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1 + %45 = load i16* %incdec.ptr495, align 2, !tbaa !3 + %conv510 = zext i16 %45 to i32 + %sub511 = sub nsw i32 %cond.i5.i1456, %conv510 + %idxprom512 = sext i32 %sub511 to i64 + %arrayidx513 = getelementptr inbounds i32* %cond, i64 %idxprom512 + %46 = load i32* %arrayidx513, align 4, !tbaa !0 + %add514 = add nsw i32 %add500, %46 + %add.ptr517 = getelementptr inbounds i16* %refptr.11590, i64 %incdec.ptr502.sum + %exitcond1692 = icmp eq i32 undef, 4 + br i1 %exitcond1692, label %for.end520, label %for.body293 + +for.end520: ; preds = %for.body293 + store i32 %add346, i32* undef, align 4, !tbaa !0 + store i32 %add402, i32* undef, align 4, !tbaa !0 + store i32 %add458, i32* undef, align 4, !tbaa !0 + store i32 %add514, i32* null, align 4, !tbaa !0 + br i1 undef, label %for.end543, label %for.cond290.preheader + +for.end543: ; preds = %for.end520 + br i1 undef, label %for.inc997, label %for.body549 + +for.body549: ; preds = %for.inc701, %for.end543 + %call554 = call i16* null(i16**** null, i32 signext undef, i32 signext %shl263) #1 + br label %for.cond559.preheader + +for.cond559.preheader: ; preds = %for.cond559.preheader, %for.body549 + br i1 undef, label %for.inc701, label %for.cond559.preheader + +for.inc701: ; preds = %for.cond559.preheader + br i1 undef, label %for.inc997, label %for.body549 + +for.cond713.preheader: ; preds = %for.end850, %for.body252 + br label %for.body716 + +for.body716: ; preds = %for.body716, %for.cond713.preheader + br i1 undef, label %for.end850, label %for.body716 + +for.end850: ; preds = %for.body716 + br i1 undef, label %for.end873, label %for.cond713.preheader + +for.end873: ; preds = %for.end850 + br i1 undef, label %for.inc997, label %for.body879 + +for.body879: ; preds = %for.inc992, %for.end873 + br label %for.cond889.preheader + +for.cond889.preheader: ; preds = %for.end964, %for.body879 + br i1 undef, label %for.cond894.preheader.lr.ph, label %for.end964 + +for.cond894.preheader.lr.ph: ; preds = %for.cond889.preheader + br label %for.body898.lr.ph.us + +for.end957.us: ; preds = %for.body946.us + br i1 undef, label %for.body898.lr.ph.us, label %for.end964 + +for.body946.us: ; preds = %for.body930.us, %for.body946.us + br i1 false, label %for.body946.us, label %for.end957.us + +for.body930.us: ; preds = %for.body914.us, %for.body930.us + br i1 undef, label %for.body930.us, label %for.body946.us + +for.body914.us: ; preds = %for.body898.us, %for.body914.us + br i1 undef, label %for.body914.us, label %for.body930.us + +for.body898.us: ; preds = %for.body898.lr.ph.us, %for.body898.us + br i1 undef, label %for.body898.us, label %for.body914.us + +for.body898.lr.ph.us: ; preds = %for.end957.us, %for.cond894.preheader.lr.ph + br label %for.body898.us + +for.end964: ; preds = %for.end957.us, %for.cond889.preheader + %inc990 = add nsw i32 undef, 1 + br i1 false, label %for.inc992, label %for.cond889.preheader + +for.inc992: ; preds = %for.end964 + br i1 false, label %for.inc997, label %for.body879 + +for.inc997: ; preds = %for.inc992, %for.end873, %for.inc701, %for.end543 + %cmp250 = icmp slt i32 undef, %mul10 + br i1 %cmp250, label %for.body252, label %for.end999 + +for.end999: ; preds = %for.inc997 + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{metadata !"short", metadata !1} diff --git a/test/CodeGen/PowerPC/ctr-cleanup.ll b/test/CodeGen/PowerPC/ctr-cleanup.ll new file mode 100644 index 0000000000..04e4ffb0d4 --- /dev/null +++ b/test/CodeGen/PowerPC/ctr-cleanup.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -mcpu=a2 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @main() #0 { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 5 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK: @main +; CHECK: li {{[0-9]+}}, 4 +; CHECK-NOT: li {{[0-9]+}}, 4 +; CHECK: bdnz + +for.end: ; preds = %for.body, %entry + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll index 1d365d47a8..3757fa3e2f 100644 --- a/test/CodeGen/PowerPC/cttz.ll +++ b/test/CodeGen/PowerPC/cttz.ll @@ -1,10 +1,12 @@ ; Make sure this testcase does not use ctpop -; RUN: llc < %s -march=ppc32 | grep -i cntlzw +; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s declare i32 @llvm.cttz.i32(i32, i1) define i32 @bar(i32 %x) { entry: +; CHECK: @bar +; CHECK: cntlzw %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true ) ; <i32> [#uses=1] ret i32 %tmp.1 } diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll index 8d87cf793d..21e36618c5 100644 --- a/test/CodeGen/PowerPC/dbg.ll +++ b/test/CodeGen/PowerPC/dbg.ll @@ -16,10 +16,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 720942, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9, metadata !9, metadata !10} diff --git a/test/CodeGen/PowerPC/float-to-int.ll b/test/CodeGen/PowerPC/float-to-int.ll new file mode 100644 index 0000000000..39cd4f929f --- /dev/null +++ b/test/CodeGen/PowerPC/float-to-int.ll @@ -0,0 +1,93 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i64 @foo(float %a) nounwind { + %x = fptosi float %a to i64 + ret i64 %x + +; CHECK: @foo +; CHECK: fctidz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i64 @foo2(double %a) nounwind { + %x = fptosi double %a to i64 + ret i64 %x + +; CHECK: @foo2 +; CHECK: fctidz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i64 @foo3(float %a) nounwind { + %x = fptoui float %a to i64 + ret i64 %x + +; CHECK: @foo3 +; CHECK: fctiduz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i64 @foo4(double %a) nounwind { + %x = fptoui double %a to i64 + ret i64 %x + +; CHECK: @foo4 +; CHECK: fctiduz [[REG:[0-9]+]], 1 +; CHECK: stfd [[REG]], +; CHECK: ld 3, +; CHECK: blr +} + +define i32 @goo(float %a) nounwind { + %x = fptosi float %a to i32 + ret i32 %x + +; CHECK: @goo +; CHECK: fctiwz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + +define i32 @goo2(double %a) nounwind { + %x = fptosi double %a to i32 + ret i32 %x + +; CHECK: @goo2 +; CHECK: fctiwz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + +define i32 @goo3(float %a) nounwind { + %x = fptoui float %a to i32 + ret i32 %x + +; CHECK: @goo3 +; CHECK: fctiwuz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + +define i32 @goo4(double %a) nounwind { + %x = fptoui double %a to i32 + ret i32 %x + +; CHECK: @goo4 +; CHECK: fctiwuz [[REG:[0-9]+]], 1 +; CHECK: stfiwx [[REG]], +; CHECK: lwz 3, +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/frame-size.ll b/test/CodeGen/PowerPC/frame-size.ll new file mode 100644 index 0000000000..0e569a4602 --- /dev/null +++ b/test/CodeGen/PowerPC/frame-size.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" + +define i64 @foo() nounwind { +entry: + %x = alloca [32568 x i8] + %"alloca point" = bitcast i32 0 to i32 + %x1 = bitcast [32568 x i8]* %x to i8* + +; Check that the RS spill slot has been allocated (because the estimate +; will fail the small-frame-size check and the function has spills). +; CHECK: @foo +; CHECK: stdu 1, -32768(1) + + %s1 = call i64 @bar(i8* %x1) nounwind + %s2 = call i64 @bar(i8* %x1) nounwind + %s3 = call i64 @bar(i8* %x1) nounwind + %s4 = call i64 @bar(i8* %x1) nounwind + %s5 = call i64 @bar(i8* %x1) nounwind + %s6 = call i64 @bar(i8* %x1) nounwind + %s7 = call i64 @bar(i8* %x1) nounwind + %s8 = call i64 @bar(i8* %x1) nounwind + %r = call i64 @can(i64 %s1, i64 %s2, i64 %s3, i64 %s4, i64 %s5, i64 %s6, i64 %s7, i64 %s8) nounwind + br label %return + +return: + ret i64 %r +} + +declare i64 @bar(i8*) +declare i64 @can(i64, i64, i64, i64, i64, i64, i64, i64) + diff --git a/test/CodeGen/PowerPC/frameaddr.ll b/test/CodeGen/PowerPC/frameaddr.ll new file mode 100644 index 0000000000..eabd4a68aa --- /dev/null +++ b/test/CodeGen/PowerPC/frameaddr.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +define i8* @main() #0 { +entry: + %0 = call i8* @llvm.frameaddress(i32 0) + ret i8* %0 + +; CHECK: @main +; CHECK: mr 3, 1 +} + +define i8* @foo() #3 { ; naked +entry: + %0 = call i8* @llvm.frameaddress(i32 0) + ret i8* %0 + +; CHECK: @foo +; CHECK: mr 3, 1 +} + +define i8* @bar() #0 { +entry: + %x = alloca [100000 x i8] ; <[100000 x i8]*> [#uses=1] + %x1 = bitcast [100000 x i8]* %x to i8* ; <i8*> [#uses=1] + call void @use(i8* %x1) nounwind + %0 = call i8* @llvm.frameaddress(i32 0) + ret i8* %0 + +; Note that if we start eliminating non-leaf frame pointers by default, this +; will need to be updated. +; CHECK: @bar +; CHECK: mr 3, 31 +} + +declare void @use(i8*) + +declare i8* @llvm.frameaddress(i32) #2 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind naked "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/test/CodeGen/PowerPC/i32-to-float.ll b/test/CodeGen/PowerPC/i32-to-float.ll new file mode 100644 index 0000000000..2707d0352d --- /dev/null +++ b/test/CodeGen/PowerPC/i32-to-float.ll @@ -0,0 +1,82 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr6 | FileCheck -check-prefix=CHECK-PWR6 %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-A2 %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @foo(i32 %a) nounwind { +entry: + %x = sitofp i32 %a to float + ret float %x + +; CHECK: @foo +; CHECK: extsw [[REG:[0-9]+]], 3 +; CHECK: std [[REG]], +; CHECK: lfd [[REG2:[0-9]+]], +; CHECK: fcfid [[REG3:[0-9]+]], [[REG2]] +; CHECK: frsp 1, [[REG3]] +; CHECK: blr + +; CHECK-PWR6: @foo +; CHECK-PWR6: stw 3, +; CHECK-PWR6: lfiwax [[REG:[0-9]+]], +; CHECK-PWR6: fcfid [[REG2:[0-9]+]], [[REG]] +; CHECK-PWR6: frsp 1, [[REG2]] +; CHECK-PWR6: blr + +; CHECK-A2: @foo +; CHECK-A2: stw 3, +; CHECK-A2: lfiwax [[REG:[0-9]+]], +; CHECK-A2: fcfids 1, [[REG]] +; CHECK-A2: blr +} + +define double @goo(i32 %a) nounwind { +entry: + %x = sitofp i32 %a to double + ret double %x + +; CHECK: @goo +; CHECK: extsw [[REG:[0-9]+]], 3 +; CHECK: std [[REG]], +; CHECK: lfd [[REG2:[0-9]+]], +; CHECK: fcfid 1, [[REG2]] +; CHECK: blr + +; CHECK-PWR6: @goo +; CHECK-PWR6: stw 3, +; CHECK-PWR6: lfiwax [[REG:[0-9]+]], +; CHECK-PWR6: fcfid 1, [[REG]] +; CHECK-PWR6: blr + +; CHECK-A2: @goo +; CHECK-A2: stw 3, +; CHECK-A2: lfiwax [[REG:[0-9]+]], +; CHECK-A2: fcfid 1, [[REG]] +; CHECK-A2: blr +} + +define float @foou(i32 %a) nounwind { +entry: + %x = uitofp i32 %a to float + ret float %x + +; CHECK-A2: @foou +; CHECK-A2: stw 3, +; CHECK-A2: lfiwzx [[REG:[0-9]+]], +; CHECK-A2: fcfidus 1, [[REG]] +; CHECK-A2: blr +} + +define double @goou(i32 %a) nounwind { +entry: + %x = uitofp i32 %a to double + ret double %x + +; CHECK-A2: @goou +; CHECK-A2: stw 3, +; CHECK-A2: lfiwzx [[REG:[0-9]+]], +; CHECK-A2: fcfidu 1, [[REG]] +; CHECK-A2: blr +} + diff --git a/test/CodeGen/PowerPC/i64-to-float.ll b/test/CodeGen/PowerPC/i64-to-float.ll new file mode 100644 index 0000000000..b81d109e7f --- /dev/null +++ b/test/CodeGen/PowerPC/i64-to-float.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @foo(i64 %a) nounwind { +entry: + %x = sitofp i64 %a to float + ret float %x + +; CHECK: @foo +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfids 1, [[REG]] +; CHECK: blr +} + +define double @goo(i64 %a) nounwind { +entry: + %x = sitofp i64 %a to double + ret double %x + +; CHECK: @goo +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfid 1, [[REG]] +; CHECK: blr +} + +define float @foou(i64 %a) nounwind { +entry: + %x = uitofp i64 %a to float + ret float %x + +; CHECK: @foou +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfidus 1, [[REG]] +; CHECK: blr +} + +define double @goou(i64 %a) nounwind { +entry: + %x = uitofp i64 %a to double + ret double %x + +; CHECK: @goou +; CHECK: std 3, +; CHECK: lfd [[REG:[0-9]+]], +; CHECK: fcfidu 1, [[REG]] +; CHECK: blr +} + diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll index 5ae1be8953..d2a3239ab8 100644 --- a/test/CodeGen/PowerPC/i64_fp_round.ll +++ b/test/CodeGen/PowerPC/i64_fp_round.ll @@ -1,4 +1,4 @@ -; RUN: llc -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -mattr=-fpcvt < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -22,6 +22,6 @@ entry: ; Also check that with -enable-unsafe-fp-math we do not get that extra ; code sequence. Simply verify that there is no "isel" present. -; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE +; RUN: llc -mcpu=pwr7 -mattr=-fpcvt -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE ; CHECK-UNSAFE-NOT: isel diff --git a/test/CodeGen/PowerPC/Stats/iabs.ll b/test/CodeGen/PowerPC/iabs.ll index 7d089bbd65..f683238de2 100644 --- a/test/CodeGen/PowerPC/Stats/iabs.ll +++ b/test/CodeGen/PowerPC/iabs.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \ ; RUN: grep "4 .*Number of machine instrs printed" diff --git a/test/CodeGen/PowerPC/jaggedstructs.ll b/test/CodeGen/PowerPC/jaggedstructs.ll index 62aa7cf929..a10c5ddb36 100644 --- a/test/CodeGen/PowerPC/jaggedstructs.ll +++ b/test/CodeGen/PowerPC/jaggedstructs.ll @@ -23,22 +23,22 @@ entry: ; CHECK: std 4, 200(1) ; CHECK: std 3, 192(1) ; CHECK: lbz {{[0-9]+}}, 199(1) -; CHECK: stb {{[0-9]+}}, 55(1) ; CHECK: lhz {{[0-9]+}}, 197(1) +; CHECK: stb {{[0-9]+}}, 55(1) ; CHECK: sth {{[0-9]+}}, 53(1) ; CHECK: lbz {{[0-9]+}}, 207(1) -; CHECK: stb {{[0-9]+}}, 63(1) ; CHECK: lwz {{[0-9]+}}, 203(1) +; CHECK: stb {{[0-9]+}}, 63(1) ; CHECK: stw {{[0-9]+}}, 59(1) ; CHECK: lhz {{[0-9]+}}, 214(1) -; CHECK: sth {{[0-9]+}}, 70(1) ; CHECK: lwz {{[0-9]+}}, 210(1) +; CHECK: sth {{[0-9]+}}, 70(1) ; CHECK: stw {{[0-9]+}}, 66(1) ; CHECK: lbz {{[0-9]+}}, 223(1) -; CHECK: stb {{[0-9]+}}, 79(1) ; CHECK: lhz {{[0-9]+}}, 221(1) -; CHECK: sth {{[0-9]+}}, 77(1) ; CHECK: lwz {{[0-9]+}}, 217(1) +; CHECK: stb {{[0-9]+}}, 79(1) +; CHECK: sth {{[0-9]+}}, 77(1) ; CHECK: stw {{[0-9]+}}, 73(1) ; CHECK: ld 6, 72(1) ; CHECK: ld 5, 64(1) diff --git a/test/CodeGen/PowerPC/lbzux.ll b/test/CodeGen/PowerPC/lbzux.ll index 12f1d1f130..98951306fd 100644 --- a/test/CodeGen/PowerPC/lbzux.ll +++ b/test/CodeGen/PowerPC/lbzux.ll @@ -1,6 +1,6 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" -; RUN: llc < %s | FileCheck %s +; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind { entry: diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg index 4019eca0bb..aaa31d93d5 100644 --- a/test/CodeGen/PowerPC/lit.local.cfg +++ b/test/CodeGen/PowerPC/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'PowerPC' in targets: diff --git a/test/CodeGen/PowerPC/negctr.ll b/test/CodeGen/PowerPC/negctr.ll new file mode 100644 index 0000000000..2f6995c65d --- /dev/null +++ b/test/CodeGen/PowerPC/negctr.ll @@ -0,0 +1,83 @@ +; RUN: llc < %s -mcpu=a2 | FileCheck %s +; RUN: llc < %s -mcpu=a2 -disable-lsr | FileCheck -check-prefix=NOLSR %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define void @main() #0 { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 0 + br i1 %exitcond, label %for.end, label %for.body + +; FIXME: We currently can't form the 32-bit unsigned trip count necessary here! +; CHECK: @main +; CHECK-NOT: bdnz + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @main1() #0 { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 0 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK: @main1 +; CHECK: li [[REG:[0-9]+]], -1 +; CHECK: mtctr [[REG]] +; CHECK: bdnz + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @main2() #0 { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, -100000 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK: @main2 +; CHECK: lis [[REG:[0-9]+]], -2 +; CHECK: ori [[REG2:[0-9]+]], [[REG]], 31071 +; CHECK: mtctr [[REG2]] +; CHECK: bdnz + +for.end: ; preds = %for.body, %entry + ret void +} + +define void @main3() #0 { +entry: + br i1 undef, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 127984, %entry ] + %indvars.iv.next = add i64 %indvars.iv, -16 + %exitcond = icmp eq i64 %indvars.iv.next, -16 + br i1 %exitcond, label %for.end, label %for.body + +; NOLSR: @main3 +; NOLSR: li [[REG:[0-9]+]], 8000 +; NOLSR: mtctr [[REG]] +; NOLSR: bdnz + +for.end: ; preds = %for.body, %entry + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/PowerPC/popcnt.ll b/test/CodeGen/PowerPC/popcnt.ll new file mode 100644 index 0000000000..b304d72aed --- /dev/null +++ b/test/CodeGen/PowerPC/popcnt.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=ppc64 -mattr=+popcntd < %s | FileCheck %s + +define i8 @cnt8(i8 %x) nounwind readnone { + %cnt = tail call i8 @llvm.ctpop.i8(i8 %x) + ret i8 %cnt +; CHECK: @cnt8 +; CHECK: rlwinm +; CHECK: popcntw +; CHECK: blr +} + +define i16 @cnt16(i16 %x) nounwind readnone { + %cnt = tail call i16 @llvm.ctpop.i16(i16 %x) + ret i16 %cnt +; CHECK: @cnt16 +; CHECK: rlwinm +; CHECK: popcntw +; CHECK: blr +} + +define i32 @cnt32(i32 %x) nounwind readnone { + %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) + ret i32 %cnt +; CHECK: @cnt32 +; CHECK: popcntw +; CHECK: blr +} + +define i64 @cnt64(i64 %x) nounwind readnone { + %cnt = tail call i64 @llvm.ctpop.i64(i64 %x) + ret i64 %cnt +; CHECK: @cnt64 +; CHECK: popcntd +; CHECK: blr +} + +declare i8 @llvm.ctpop.i8(i8) nounwind readnone +declare i16 @llvm.ctpop.i16(i16) nounwind readnone +declare i32 @llvm.ctpop.i32(i32) nounwind readnone +declare i64 @llvm.ctpop.i64(i64) nounwind readnone diff --git a/test/CodeGen/PowerPC/r31.ll b/test/CodeGen/PowerPC/r31.ll new file mode 100644 index 0000000000..7ce12f600b --- /dev/null +++ b/test/CodeGen/PowerPC/r31.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" + +define i64 @foo(i64 %a) nounwind { +entry: + call void asm sideeffect "", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30}"() nounwind + br label %return + +; CHECK: @foo +; CHECK: mr 31, 3 + +return: ; preds = %entry + ret i64 %a +} + diff --git a/test/CodeGen/PowerPC/Stats/rlwimi3.ll b/test/CodeGen/PowerPC/rlwimi3.ll index 7efdbe9634..31b6d4aa03 100644 --- a/test/CodeGen/PowerPC/Stats/rlwimi3.ll +++ b/test/CodeGen/PowerPC/rlwimi3.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=ppc32 -stats 2>&1 | \ ; RUN: grep "Number of machine instrs printed" | grep 12 diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll new file mode 100644 index 0000000000..b210a6bda8 --- /dev/null +++ b/test/CodeGen/PowerPC/rounding-ops.ll @@ -0,0 +1,145 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define float @test1(float %x) nounwind { + %call = tail call float @floorf(float %x) nounwind readnone + ret float %call + +; CHECK: test1: +; CHECK: frim 1, 1 + +; CHECK-FM: test1: +; CHECK-FM: frim 1, 1 +} + +declare float @floorf(float) nounwind readnone + +define double @test2(double %x) nounwind { + %call = tail call double @floor(double %x) nounwind readnone + ret double %call + +; CHECK: test2: +; CHECK: frim 1, 1 + +; CHECK-FM: test2: +; CHECK-FM: frim 1, 1 +} + +declare double @floor(double) nounwind readnone + +define float @test3(float %x) nounwind { + %call = tail call float @nearbyintf(float %x) nounwind readnone + ret float %call + +; CHECK: test3: +; CHECK-NOT: frin + +; CHECK-FM: test3: +; CHECK-FM: frin 1, 1 +} + +declare float @nearbyintf(float) nounwind readnone + +define double @test4(double %x) nounwind { + %call = tail call double @nearbyint(double %x) nounwind readnone + ret double %call + +; CHECK: test4: +; CHECK-NOT: frin + +; CHECK-FM: test4: +; CHECK-FM: frin 1, 1 +} + +declare double @nearbyint(double) nounwind readnone + +define float @test5(float %x) nounwind { + %call = tail call float @ceilf(float %x) nounwind readnone + ret float %call + +; CHECK: test5: +; CHECK: frip 1, 1 + +; CHECK-FM: test5: +; CHECK-FM: frip 1, 1 +} + +declare float @ceilf(float) nounwind readnone + +define double @test6(double %x) nounwind { + %call = tail call double @ceil(double %x) nounwind readnone + ret double %call + +; CHECK: test6: +; CHECK: frip 1, 1 + +; CHECK-FM: test6: +; CHECK-FM: frip 1, 1 +} + +declare double @ceil(double) nounwind readnone + +define float @test9(float %x) nounwind { + %call = tail call float @truncf(float %x) nounwind readnone + ret float %call + +; CHECK: test9: +; CHECK: friz 1, 1 + +; CHECK-FM: test9: +; CHECK-FM: friz 1, 1 +} + +declare float @truncf(float) nounwind readnone + +define double @test10(double %x) nounwind { + %call = tail call double @trunc(double %x) nounwind readnone + ret double %call + +; CHECK: test10: +; CHECK: friz 1, 1 + +; CHECK-FM: test10: +; CHECK-FM: friz 1, 1 +} + +declare double @trunc(double) nounwind readnone + +define float @test11(float %x) nounwind { + %call = tail call float @rintf(float %x) nounwind readnone + ret float %call + +; CHECK: test11: +; CHECK-NOT: frin + +; CHECK-FM: test11: +; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]] +; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]] +; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2 +; CHECK-FM: mtfsb1 6 +; CHECK-FM: .LBB[[BB]]_2: +; CHECK-FM: blr +} + +declare float @rintf(float) nounwind readnone + +define double @test12(double %x) nounwind { + %call = tail call double @rint(double %x) nounwind readnone + ret double %call + +; CHECK: test12: +; CHECK-NOT: frin + +; CHECK-FM: test12: +; CHECK-FM: frin [[R2:[0-9]+]], [[R1:[0-9]+]] +; CHECK-FM: fcmpu [[CR:[0-9]+]], [[R2]], [[R1]] +; CHECK-FM: beq [[CR]], .LBB[[BB:[0-9]+]]_2 +; CHECK-FM: mtfsb1 6 +; CHECK-FM: .LBB[[BB]]_2: +; CHECK-FM: blr +} + +declare double @rint(double) nounwind readnone + diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll new file mode 100644 index 0000000000..7ea35dafc3 --- /dev/null +++ b/test/CodeGen/PowerPC/sjlj.ll @@ -0,0 +1,112 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck -check-prefix=CHECK-NOAV %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.__jmp_buf_tag = type { [64 x i64], i32, %struct.__sigset_t, [8 x i8] } +%struct.__sigset_t = type { [16 x i64] } + +@env_sigill = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16 + +define void @foo() #0 { +entry: + call void @llvm.eh.sjlj.longjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*)) + unreachable + +; CHECK: @foo +; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha +; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l +; CHECK: ld 31, 0([[REG]]) +; CHECK: ld [[REG2:[0-9]+]], 8([[REG]]) +; CHECK: ld 1, 16([[REG]]) +; CHECK: mtctr [[REG2]] +; CHECK: ld 2, 24([[REG]]) +; CHECK: bctr + +return: ; No predecessors! + ret void +} + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +define signext i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + store i32 0, i32* %retval + %0 = call i8* @llvm.frameaddress(i32 0) + store i8* %0, i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**) + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** getelementptr (i8** bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8**), i32 2) + %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([1 x %struct.__jmp_buf_tag]* @env_sigill to i8*)) + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + store i32 1, i32* %retval + br label %return + +if.else: ; preds = %entry + call void @foo() + br label %if.end + +if.end: ; preds = %if.else + store i32 0, i32* %retval + br label %return + +return: ; preds = %if.end, %if.then + %3 = load i32* %retval + ret i32 %3 + +; FIXME: We should be saving VRSAVE on Darwin, but we're not! + +; CHECK: @main +; CHECK: std +; Make sure that we're not saving VRSAVE on non-Darwin: +; CHECK-NOT: mfspr +; CHECK: stfd +; CHECK: stvx + +; CHECK: addis [[REG:[0-9]+]], 2, env_sigill@toc@ha +; CHECK: std 31, env_sigill@toc@l([[REG]]) +; CHECK: addi [[REG]], [[REG]], env_sigill@toc@l +; CHECK: std [[REG]], [[OFF:[0-9]+]](31) # 8-byte Folded Spill +; CHECK: std 1, 16([[REG]]) +; CHECK: std 2, 24([[REG]]) +; CHECK: bcl 20, 31, .LBB1_1 +; CHECK: li 3, 1 +; CHECK: #EH_SjLj_Setup .LBB1_1 +; CHECK: b .LBB1_2 + +; CHECK: .LBB1_1: +; CHECK: mflr [[REGL:[0-9]+]] +; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31) # 8-byte Folded Reload +; CHECK: std [[REGL]], 8([[REG2]]) +; CHECK: li 3, 0 + +; CHECK: .LBB1_2: + +; CHECK: lfd +; CHECK: lvx +; CHECK: ld +; CHECK: blr + +; CHECK-NOAV: @main +; CHECK-NOAV-NOT: stvx +; CHECK-NOAV: bcl +; CHECK-NOAV: mflr +; CHECK-NOAV: bl foo +; CHECK-NOAV-NOT: lvx +; CHECK-NOAV: blr +} + +declare i8* @llvm.frameaddress(i32) #2 + +declare i8* @llvm.stacksave() #3 + +declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + diff --git a/test/CodeGen/PowerPC/stfiwx-2.ll b/test/CodeGen/PowerPC/stfiwx-2.ll index c49b25cc23..7786fc17ea 100644 --- a/test/CodeGen/PowerPC/stfiwx-2.ll +++ b/test/CodeGen/PowerPC/stfiwx-2.ll @@ -1,11 +1,14 @@ -; This cannot be a stfiwx -; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb -; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx +; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=g5 | FileCheck %s define void @test(float %F, i8* %P) { %I = fptosi float %F to i32 %X = trunc i32 %I to i8 store i8 %X, i8* %P ret void +; CHECK: fctiwz 0, 1 +; CHECK: stfiwx 0, 0, 4 +; CHECK: lwz 4, 12(1) +; CHECK: stb 4, 0(3) +; CHECK: blr } diff --git a/test/CodeGen/PowerPC/store-update.ll b/test/CodeGen/PowerPC/store-update.ll new file mode 100644 index 0000000000..538ed24fbc --- /dev/null +++ b/test/CodeGen/PowerPC/store-update.ll @@ -0,0 +1,170 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define i8* @stbu(i8* %base, i8 zeroext %val) nounwind { +entry: + %arrayidx = getelementptr inbounds i8* %base, i64 16 + store i8 %val, i8* %arrayidx, align 1 + ret i8* %arrayidx +} +; CHECK: @stbu +; CHECK: %entry +; CHECK-NEXT: stbu +; CHECK-NEXT: blr + +define i8* @stbux(i8* %base, i8 zeroext %val, i64 %offset) nounwind { +entry: + %arrayidx = getelementptr inbounds i8* %base, i64 %offset + store i8 %val, i8* %arrayidx, align 1 + ret i8* %arrayidx +} +; CHECK: @stbux +; CHECK: %entry +; CHECK-NEXT: stbux +; CHECK-NEXT: blr + +define i16* @sthu(i16* %base, i16 zeroext %val) nounwind { +entry: + %arrayidx = getelementptr inbounds i16* %base, i64 16 + store i16 %val, i16* %arrayidx, align 2 + ret i16* %arrayidx +} +; CHECK: @sthu +; CHECK: %entry +; CHECK-NEXT: sthu +; CHECK-NEXT: blr + +define i16* @sthux(i16* %base, i16 zeroext %val, i64 %offset) nounwind { +entry: + %arrayidx = getelementptr inbounds i16* %base, i64 %offset + store i16 %val, i16* %arrayidx, align 2 + ret i16* %arrayidx +} +; CHECK: @sthux +; CHECK: %entry +; CHECK-NEXT: sldi +; CHECK-NEXT: sthux +; CHECK-NEXT: blr + +define i32* @stwu(i32* %base, i32 zeroext %val) nounwind { +entry: + %arrayidx = getelementptr inbounds i32* %base, i64 16 + store i32 %val, i32* %arrayidx, align 4 + ret i32* %arrayidx +} +; CHECK: @stwu +; CHECK: %entry +; CHECK-NEXT: stwu +; CHECK-NEXT: blr + +define i32* @stwux(i32* %base, i32 zeroext %val, i64 %offset) nounwind { +entry: + %arrayidx = getelementptr inbounds i32* %base, i64 %offset + store i32 %val, i32* %arrayidx, align 4 + ret i32* %arrayidx +} +; CHECK: @stwux +; CHECK: %entry +; CHECK-NEXT: sldi +; CHECK-NEXT: stwux +; CHECK-NEXT: blr + +define i8* @stbu8(i8* %base, i64 %val) nounwind { +entry: + %conv = trunc i64 %val to i8 + %arrayidx = getelementptr inbounds i8* %base, i64 16 + store i8 %conv, i8* %arrayidx, align 1 + ret i8* %arrayidx +} +; CHECK: @stbu +; CHECK: %entry +; CHECK-NEXT: stbu +; CHECK-NEXT: blr + +define i8* @stbux8(i8* %base, i64 %val, i64 %offset) nounwind { +entry: + %conv = trunc i64 %val to i8 + %arrayidx = getelementptr inbounds i8* %base, i64 %offset + store i8 %conv, i8* %arrayidx, align 1 + ret i8* %arrayidx +} +; CHECK: @stbux +; CHECK: %entry +; CHECK-NEXT: stbux +; CHECK-NEXT: blr + +define i16* @sthu8(i16* %base, i64 %val) nounwind { +entry: + %conv = trunc i64 %val to i16 + %arrayidx = getelementptr inbounds i16* %base, i64 16 + store i16 %conv, i16* %arrayidx, align 2 + ret i16* %arrayidx +} +; CHECK: @sthu +; CHECK: %entry +; CHECK-NEXT: sthu +; CHECK-NEXT: blr + +define i16* @sthux8(i16* %base, i64 %val, i64 %offset) nounwind { +entry: + %conv = trunc i64 %val to i16 + %arrayidx = getelementptr inbounds i16* %base, i64 %offset + store i16 %conv, i16* %arrayidx, align 2 + ret i16* %arrayidx +} +; CHECK: @sthux +; CHECK: %entry +; CHECK-NEXT: sldi +; CHECK-NEXT: sthux +; CHECK-NEXT: blr + +define i32* @stwu8(i32* %base, i64 %val) nounwind { +entry: + %conv = trunc i64 %val to i32 + %arrayidx = getelementptr inbounds i32* %base, i64 16 + store i32 %conv, i32* %arrayidx, align 4 + ret i32* %arrayidx +} +; CHECK: @stwu +; CHECK: %entry +; CHECK-NEXT: stwu +; CHECK-NEXT: blr + +define i32* @stwux8(i32* %base, i64 %val, i64 %offset) nounwind { +entry: + %conv = trunc i64 %val to i32 + %arrayidx = getelementptr inbounds i32* %base, i64 %offset + store i32 %conv, i32* %arrayidx, align 4 + ret i32* %arrayidx +} +; CHECK: @stwux +; CHECK: %entry +; CHECK-NEXT: sldi +; CHECK-NEXT: stwux +; CHECK-NEXT: blr + +define i64* @stdu(i64* %base, i64 %val) nounwind { +entry: + %arrayidx = getelementptr inbounds i64* %base, i64 16 + store i64 %val, i64* %arrayidx, align 8 + ret i64* %arrayidx +} +; CHECK: @stdu +; CHECK: %entry +; CHECK-NEXT: stdu +; CHECK-NEXT: blr + +define i64* @stdux(i64* %base, i64 %val, i64 %offset) nounwind { +entry: + %arrayidx = getelementptr inbounds i64* %base, i64 %offset + store i64 %val, i64* %arrayidx, align 8 + ret i64* %arrayidx +} +; CHECK: @stdux +; CHECK: %entry +; CHECK-NEXT: sldi +; CHECK-NEXT: stdux +; CHECK-NEXT: blr + diff --git a/test/CodeGen/PowerPC/structsinmem.ll b/test/CodeGen/PowerPC/structsinmem.ll index 884d3a89d1..2a17e740ea 100644 --- a/test/CodeGen/PowerPC/structsinmem.ll +++ b/test/CodeGen/PowerPC/structsinmem.ll @@ -1,9 +1,5 @@ ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s -; FIXME: The code generation for packed structs is very poor because the -; PowerPC target wrongly rejects all unaligned loads. This test case will -; need to be revised when that is fixed. - target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -118,8 +114,8 @@ entry: ret i32 %add13 ; CHECK: lha {{[0-9]+}}, 126(1) -; CHECK: lbz {{[0-9]+}}, 119(1) ; CHECK: lha {{[0-9]+}}, 132(1) +; CHECK: lbz {{[0-9]+}}, 119(1) ; CHECK: lwz {{[0-9]+}}, 140(1) ; CHECK: lwz {{[0-9]+}}, 144(1) ; CHECK: lwz {{[0-9]+}}, 152(1) @@ -209,19 +205,11 @@ entry: %add13 = add nsw i32 %add11, %6 ret i32 %add13 -; CHECK: lbz {{[0-9]+}}, 149(1) -; CHECK: lbz {{[0-9]+}}, 150(1) -; CHECK: lbz {{[0-9]+}}, 147(1) -; CHECK: lbz {{[0-9]+}}, 148(1) -; CHECK: lbz {{[0-9]+}}, 133(1) -; CHECK: lbz {{[0-9]+}}, 134(1) ; CHECK: lha {{[0-9]+}}, 126(1) +; CHECK: lha {{[0-9]+}}, 133(1) ; CHECK: lbz {{[0-9]+}}, 119(1) ; CHECK: lwz {{[0-9]+}}, 140(1) -; CHECK: lhz {{[0-9]+}}, 154(1) -; CHECK: lhz {{[0-9]+}}, 156(1) -; CHECK: lbz {{[0-9]+}}, 163(1) -; CHECK: lbz {{[0-9]+}}, 164(1) -; CHECK: lbz {{[0-9]+}}, 161(1) -; CHECK: lbz {{[0-9]+}}, 162(1) +; CHECK: lwz {{[0-9]+}}, 147(1) +; CHECK: lwz {{[0-9]+}}, 154(1) +; CHECK: lwz {{[0-9]+}}, 161(1) } diff --git a/test/CodeGen/PowerPC/structsinregs.ll b/test/CodeGen/PowerPC/structsinregs.ll index ef706af95d..54de6060d0 100644 --- a/test/CodeGen/PowerPC/structsinregs.ll +++ b/test/CodeGen/PowerPC/structsinregs.ll @@ -1,9 +1,5 @@ ; RUN: llc -mcpu=pwr7 -O0 -disable-fp-elim < %s | FileCheck %s -; FIXME: The code generation for packed structs is very poor because the -; PowerPC target wrongly rejects all unaligned loads. This test case will -; need to be revised when that is fixed. - target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -63,13 +59,13 @@ entry: %call = call i32 @callee1(%struct.s1* byval %p1, %struct.s2* byval %p2, %struct.s3* byval %p3, %struct.s4* byval %p4, %struct.s5* byval %p5, %struct.s6* byval %p6, %struct.s7* byval %p7) ret i32 %call -; CHECK: ld 9, 128(31) -; CHECK: ld 8, 136(31) -; CHECK: ld 7, 144(31) -; CHECK: lwz 6, 152(31) -; CHECK: lwz 5, 160(31) -; CHECK: lhz 4, 168(31) -; CHECK: lbz 3, 176(31) +; CHECK: ld 9, 112(31) +; CHECK: ld 8, 120(31) +; CHECK: ld 7, 128(31) +; CHECK: lwz 6, 136(31) +; CHECK: lwz 5, 144(31) +; CHECK: lhz 4, 152(31) +; CHECK: lbz 3, 160(31) } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind @@ -109,8 +105,8 @@ entry: ; CHECK: sth 4, 62(1) ; CHECK: stb 3, 55(1) ; CHECK: lha {{[0-9]+}}, 62(1) -; CHECK: lbz {{[0-9]+}}, 55(1) ; CHECK: lha {{[0-9]+}}, 68(1) +; CHECK: lbz {{[0-9]+}}, 55(1) ; CHECK: lwz {{[0-9]+}}, 76(1) ; CHECK: lwz {{[0-9]+}}, 80(1) ; CHECK: lwz {{[0-9]+}}, 88(1) @@ -155,10 +151,10 @@ entry: ; CHECK: ld 9, 96(1) ; CHECK: ld 8, 88(1) ; CHECK: ld 7, 80(1) -; CHECK: lwz 6, 152(31) +; CHECK: lwz 6, 136(31) ; CHECK: ld 5, 64(1) -; CHECK: lhz 4, 168(31) -; CHECK: lbz 3, 176(31) +; CHECK: lhz 4, 152(31) +; CHECK: lbz 3, 160(31) } define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind { @@ -195,19 +191,11 @@ entry: ; CHECK: std 5, 64(1) ; CHECK: sth 4, 62(1) ; CHECK: stb 3, 55(1) -; CHECK: lbz {{[0-9]+}}, 85(1) -; CHECK: lbz {{[0-9]+}}, 86(1) -; CHECK: lbz {{[0-9]+}}, 83(1) -; CHECK: lbz {{[0-9]+}}, 84(1) -; CHECK: lbz {{[0-9]+}}, 69(1) -; CHECK: lbz {{[0-9]+}}, 70(1) ; CHECK: lha {{[0-9]+}}, 62(1) +; CHECK: lha {{[0-9]+}}, 69(1) ; CHECK: lbz {{[0-9]+}}, 55(1) ; CHECK: lwz {{[0-9]+}}, 76(1) -; CHECK: lhz {{[0-9]+}}, 90(1) -; CHECK: lhz {{[0-9]+}}, 92(1) -; CHECK: lbz {{[0-9]+}}, 99(1) -; CHECK: lbz {{[0-9]+}}, 100(1) -; CHECK: lbz {{[0-9]+}}, 97(1) -; CHECK: lbz {{[0-9]+}}, 98(1) +; CHECK: lwz {{[0-9]+}}, 83(1) +; CHECK: lwz {{[0-9]+}}, 90(1) +; CHECK: lwz {{[0-9]+}}, 97(1) } diff --git a/test/CodeGen/PowerPC/stwu8.ll b/test/CodeGen/PowerPC/stwu8.ll index 897bfc6d6c..e0bd043454 100644 --- a/test/CodeGen/PowerPC/stwu8.ll +++ b/test/CodeGen/PowerPC/stwu8.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s | FileCheck %s +; RUN: llc -disable-ppc-unaligned < %s | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" diff --git a/test/CodeGen/PowerPC/tls-gd.ll b/test/CodeGen/PowerPC/tls-gd.ll index fb8dfaf04a..5f0ef9a050 100644 --- a/test/CodeGen/PowerPC/tls-gd.ll +++ b/test/CodeGen/PowerPC/tls-gd.ll @@ -18,6 +18,6 @@ entry: ; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsgd@ha ; CHECK-NEXT: addi 3, [[REG]], a@got@tlsgd@l -; CHECK-NEXT: bl __tls_get_addr(a@tlsgd) +; CHECK: bl __tls_get_addr(a@tlsgd) ; CHECK-NEXT: nop diff --git a/test/CodeGen/PowerPC/tls-ld-2.ll b/test/CodeGen/PowerPC/tls-ld-2.ll index 4954afeb24..4399b330ea 100644 --- a/test/CodeGen/PowerPC/tls-ld-2.ll +++ b/test/CodeGen/PowerPC/tls-ld-2.ll @@ -18,7 +18,7 @@ entry: ; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha ; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l -; CHECK-NEXT: bl __tls_get_addr(a@tlsld) +; CHECK: bl __tls_get_addr(a@tlsld) ; CHECK-NEXT: nop -; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha +; CHECK: addis [[REG2:[0-9]+]], 3, a@dtprel@ha ; CHECK-NEXT: lwa {{[0-9]+}}, a@dtprel@l([[REG2]]) diff --git a/test/CodeGen/PowerPC/tls-ld.ll b/test/CodeGen/PowerPC/tls-ld.ll index 1ebc6129e2..db02a56f6a 100644 --- a/test/CodeGen/PowerPC/tls-ld.ll +++ b/test/CodeGen/PowerPC/tls-ld.ll @@ -18,7 +18,7 @@ entry: ; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsld@ha ; CHECK-NEXT: addi 3, [[REG]], a@got@tlsld@l -; CHECK-NEXT: bl __tls_get_addr(a@tlsld) +; CHECK: bl __tls_get_addr(a@tlsld) ; CHECK-NEXT: nop -; CHECK-NEXT: addis [[REG2:[0-9]+]], 3, a@dtprel@ha +; CHECK: addis [[REG2:[0-9]+]], 3, a@dtprel@ha ; CHECK-NEXT: addi {{[0-9]+}}, [[REG2]], a@dtprel@l diff --git a/test/CodeGen/PowerPC/tls.ll b/test/CodeGen/PowerPC/tls.ll index 151b4b7dda..2daa60ab37 100644 --- a/test/CodeGen/PowerPC/tls.ll +++ b/test/CodeGen/PowerPC/tls.ll @@ -12,7 +12,7 @@ entry: ;OPT0: addis [[REG1:[0-9]+]], 13, a@tprel@ha ;OPT0-NEXT: li [[REG2:[0-9]+]], 42 ;OPT0-NEXT: addi [[REG1]], [[REG1]], a@tprel@l -;OPT0-NEXT: stw [[REG2]], 0([[REG1]]) +;OPT0: stw [[REG2]], 0([[REG1]]) ;OPT1: addis [[REG1:[0-9]+]], 13, a@tprel@ha ;OPT1-NEXT: li [[REG2:[0-9]+]], 42 ;OPT1-NEXT: stw [[REG2]], a@tprel@l([[REG1]]) diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll new file mode 100644 index 0000000000..169bd787c0 --- /dev/null +++ b/test/CodeGen/PowerPC/unal4-std.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define fastcc void @copy_to_conceal() #0 { +entry: + br i1 undef, label %if.then, label %if.end210 + +if.then: ; preds = %entry + br label %vector.body.i + +vector.body.i: ; preds = %vector.body.i, %if.then + %index.i = phi i64 [ 0, %vector.body.i ], [ 0, %if.then ] + store <8 x i16> zeroinitializer, <8 x i16>* undef, align 2 + br label %vector.body.i + +if.end210: ; preds = %entry + ret void + +; This will generate two align-1 i64 stores. Make sure that they are +; indexed stores and not in r+i form (which require the offset to be +; a multiple of 4). +; CHECK: @copy_to_conceal +; CHECK: stdx {{[0-9]+}}, 0, +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll new file mode 100644 index 0000000000..d05080338f --- /dev/null +++ b/test/CodeGen/PowerPC/unaligned.ll @@ -0,0 +1,73 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" + +define void @foo1(i16* %p, i16* %r) nounwind { +entry: + %v = load i16* %p, align 1 + store i16 %v, i16* %r, align 1 + ret void + +; CHECK: @foo1 +; CHECK: lhz +; CHECK: sth +} + +define void @foo2(i32* %p, i32* %r) nounwind { +entry: + %v = load i32* %p, align 1 + store i32 %v, i32* %r, align 1 + ret void + +; CHECK: @foo2 +; CHECK: lwz +; CHECK: stw +} + +define void @foo3(i64* %p, i64* %r) nounwind { +entry: + %v = load i64* %p, align 1 + store i64 %v, i64* %r, align 1 + ret void + +; CHECK: @foo3 +; CHECK: ld +; CHECK: std +} + +define void @foo4(float* %p, float* %r) nounwind { +entry: + %v = load float* %p, align 1 + store float %v, float* %r, align 1 + ret void + +; CHECK: @foo4 +; CHECK: lfs +; CHECK: stfs +} + +define void @foo5(double* %p, double* %r) nounwind { +entry: + %v = load double* %p, align 1 + store double %v, double* %r, align 1 + ret void + +; CHECK: @foo5 +; CHECK: lfd +; CHECK: stfd +} + +define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind { +entry: + %v = load <4 x float>* %p, align 1 + store <4 x float> %v, <4 x float>* %r, align 1 + ret void + +; These loads and stores are legalized into aligned loads and stores +; using aligned stack slots. +; CHECK: @foo6 +; CHECK: ld +; CHECK: ld +; CHECK: std +; CHECK: std +} + diff --git a/test/CodeGen/PowerPC/varargs.ll b/test/CodeGen/PowerPC/varargs.ll index 1769be957a..90f0480d6a 100644 --- a/test/CodeGen/PowerPC/varargs.ll +++ b/test/CodeGen/PowerPC/varargs.ll @@ -8,15 +8,16 @@ define i8* @test1(i8** %foo) nounwind { } ; P32: test1: -; P32: lwz r4, 0(r3) -; P32: addi r5, r4, 4 -; P32: stw r5, 0(r3) -; P32: lwz r3, 0(r4) -; P32: blr +; P32: lwz r2, 0(r3) +; P32: addi r4, r2, 4 +; P32: stw r4, 0(r3) +; P32: lwz r3, 0(r2) +; P32: blr ; P64: test1: -; P64: ld r4, 0(r3) -; P64: addi r5, r4, 8 -; P64: std r5, 0(r3) -; P64: ld r3, 0(r4) -; P64: blr +; P64: ld r2, 0(r3) +; P64: addi r4, r2, 8 +; P64: std r4, 0(r3) +; P64: ld r3, 0(r2) +; P64: blr + diff --git a/test/CodeGen/PowerPC/vec_rounding.ll b/test/CodeGen/PowerPC/vec_rounding.ll index f41faa0339..7c55638620 100644 --- a/test/CodeGen/PowerPC/vec_rounding.ll +++ b/test/CodeGen/PowerPC/vec_rounding.ll @@ -13,8 +13,8 @@ define <2 x double> @floor_v2f64(<2 x double> %p) ret <2 x double> %t } ; CHECK: floor_v2f64: -; CHECK: bl floor -; CHECK: bl floor +; CHECK: frim +; CHECK: frim declare <4 x double> @llvm.floor.v4f64(<4 x double> %p) define <4 x double> @floor_v4f64(<4 x double> %p) @@ -23,10 +23,10 @@ define <4 x double> @floor_v4f64(<4 x double> %p) ret <4 x double> %t } ; CHECK: floor_v4f64: -; CHECK: bl floor -; CHECK: bl floor -; CHECK: bl floor -; CHECK: bl floor +; CHECK: frim +; CHECK: frim +; CHECK: frim +; CHECK: frim declare <2 x double> @llvm.ceil.v2f64(<2 x double> %p) define <2 x double> @ceil_v2f64(<2 x double> %p) @@ -35,8 +35,8 @@ define <2 x double> @ceil_v2f64(<2 x double> %p) ret <2 x double> %t } ; CHECK: ceil_v2f64: -; CHECK: bl ceil -; CHECK: bl ceil +; CHECK: frip +; CHECK: frip declare <4 x double> @llvm.ceil.v4f64(<4 x double> %p) define <4 x double> @ceil_v4f64(<4 x double> %p) @@ -45,10 +45,10 @@ define <4 x double> @ceil_v4f64(<4 x double> %p) ret <4 x double> %t } ; CHECK: ceil_v4f64: -; CHECK: bl ceil -; CHECK: bl ceil -; CHECK: bl ceil -; CHECK: bl ceil +; CHECK: frip +; CHECK: frip +; CHECK: frip +; CHECK: frip declare <2 x double> @llvm.trunc.v2f64(<2 x double> %p) define <2 x double> @trunc_v2f64(<2 x double> %p) @@ -57,8 +57,8 @@ define <2 x double> @trunc_v2f64(<2 x double> %p) ret <2 x double> %t } ; CHECK: trunc_v2f64: -; CHECK: bl trunc -; CHECK: bl trunc +; CHECK: friz +; CHECK: friz declare <4 x double> @llvm.trunc.v4f64(<4 x double> %p) define <4 x double> @trunc_v4f64(<4 x double> %p) @@ -67,10 +67,10 @@ define <4 x double> @trunc_v4f64(<4 x double> %p) ret <4 x double> %t } ; CHECK: trunc_v4f64: -; CHECK: bl trunc -; CHECK: bl trunc -; CHECK: bl trunc -; CHECK: bl trunc +; CHECK: friz +; CHECK: friz +; CHECK: friz +; CHECK: friz declare <2 x double> @llvm.nearbyint.v2f64(<2 x double> %p) define <2 x double> @nearbyint_v2f64(<2 x double> %p) diff --git a/test/CodeGen/PowerPC/vrsave-spill.ll b/test/CodeGen/PowerPC/vrsave-spill.ll new file mode 100644 index 0000000000..c73206d8fc --- /dev/null +++ b/test/CodeGen/PowerPC/vrsave-spill.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -mtriple=powerpc64-apple-darwin -mcpu=g5 | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-apple-darwin" + +define <4 x float> @foo(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %c = fadd <4 x float> %a, %b + %d = fmul <4 x float> %c, %a + call void asm sideeffect "", "~{VRsave}"() nounwind + br label %return + +; CHECK: @foo +; CHECK: mfspr r{{[0-9]+}}, 256 +; CHECK: mtspr 256, r{{[0-9]+}} + +return: ; preds = %entry + ret <4 x float> %d +} + diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll index 7641017c43..9fb3d03477 100644 --- a/test/CodeGen/PowerPC/vrspill.ll +++ b/test/CodeGen/PowerPC/vrspill.ll @@ -13,7 +13,7 @@ entry: ret void } -; CHECK: stvx 2, 0, 0 -; CHECK: lvx 2, 0, 0 +; CHECK: stvx 2, 1, +; CHECK: lvx 2, 1, declare void @foo(i32*) diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll index a58674269a..09baee7a1d 100644 --- a/test/CodeGen/R600/disconnected-predset-break-bug.ll +++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll @@ -5,9 +5,10 @@ ; and the PREDICATE_BREAK in this loop. ; CHECK: @loop_ge -; CHECK: WHILE +; CHECK: LOOP_START_DX10 ; CHECK: PRED_SET -; CHECK-NEXT: PREDICATED_BREAK +; CHECK-NEXT: JUMP +; CHECK-NEXT: LOOP_BREAK define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) nounwind { entry: %cmp5 = icmp sgt i32 %iterations, 0 diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll index 82fb925c0d..3d70e4bd54 100644 --- a/test/CodeGen/R600/kcache-fold.ll +++ b/test/CodeGen/R600/kcache-fold.ll @@ -1,8 +1,8 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}} - -define void @main() { +; CHECK: @main1 +; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}} +define void @main1() { main_body: %0 = load <4 x float> addrspace(8)* null %1 = extractelement <4 x float> %0, i32 0 @@ -48,5 +48,53 @@ main_body: ret void } +; CHECK: @main2 +; CHECK-NOT: MOV +define void @main2() { +main_body: + %0 = load <4 x float> addrspace(8)* null + %1 = extractelement <4 x float> %0, i32 0 + %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %3 = extractelement <4 x float> %2, i32 0 + %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %5 = extractelement <4 x float> %4, i32 1 + %6 = fcmp ult float %1, 0.000000e+00 + %7 = select i1 %6, float %3, float %5 + %8 = load <4 x float> addrspace(8)* null + %9 = extractelement <4 x float> %8, i32 1 + %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %11 = extractelement <4 x float> %10, i32 0 + %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %13 = extractelement <4 x float> %12, i32 1 + %14 = fcmp ult float %9, 0.000000e+00 + %15 = select i1 %14, float %11, float %13 + %16 = load <4 x float> addrspace(8)* null + %17 = extractelement <4 x float> %16, i32 2 + %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %19 = extractelement <4 x float> %18, i32 3 + %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %21 = extractelement <4 x float> %20, i32 2 + %22 = fcmp ult float %17, 0.000000e+00 + %23 = select i1 %22, float %19, float %21 + %24 = load <4 x float> addrspace(8)* null + %25 = extractelement <4 x float> %24, i32 3 + %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %27 = extractelement <4 x float> %26, i32 3 + %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %29 = extractelement <4 x float> %28, i32 2 + %30 = fcmp ult float %25, 0.000000e+00 + %31 = select i1 %30, float %27, float %29 + %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00) + %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) + %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00) + %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00) + %36 = insertelement <4 x float> undef, float %32, i32 0 + %37 = insertelement <4 x float> %36, float %33, i32 1 + %38 = insertelement <4 x float> %37, float %34, i32 2 + %39 = insertelement <4 x float> %38, float %35, i32 3 + call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0) + ret void +} + declare float @llvm.AMDIL.clamp.(float, float, float) readnone declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll index a8f604ac6d..bf0cdaa2fa 100644 --- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll +++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll @@ -14,7 +14,7 @@ main_body: declare void @llvm.AMDGPU.shader.type(i32) -declare float @llvm.SI.fs.constant(i32, i32, i32) readonly +declare float @llvm.SI.fs.constant(i32, i32, i32) readnone declare i32 @llvm.SI.packf16(float, float) readnone diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll index d397f3b678..c724395b98 100644 --- a/test/CodeGen/R600/llvm.SI.sample.ll +++ b/test/CodeGen/R600/llvm.SI.sample.ll @@ -1,71 +1,106 @@ ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s ;CHECK: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE_C -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE_C -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE_C -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE_C -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE_C -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE_C -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE -;CHECK-NEXT: S_WAITCNT 1904 -;CHECK-NEXT: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE_C +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE +;CHECK: IMAGE_SAMPLE -define void @test() { - %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, +define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) { + %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0 + %v2 = insertelement <4 x i32> undef, i32 %a1, i32 1 + %v3 = insertelement <4 x i32> undef, i32 %a1, i32 2 + %v4 = insertelement <4 x i32> undef, i32 %a1, i32 3 + %v5 = insertelement <4 x i32> undef, i32 %a2, i32 0 + %v6 = insertelement <4 x i32> undef, i32 %a2, i32 1 + %v7 = insertelement <4 x i32> undef, i32 %a2, i32 2 + %v8 = insertelement <4 x i32> undef, i32 %a2, i32 3 + %v9 = insertelement <4 x i32> undef, i32 %a3, i32 0 + %v10 = insertelement <4 x i32> undef, i32 %a3, i32 1 + %v11 = insertelement <4 x i32> undef, i32 %a3, i32 2 + %v12 = insertelement <4 x i32> undef, i32 %a3, i32 3 + %v13 = insertelement <4 x i32> undef, i32 %a4, i32 0 + %v14 = insertelement <4 x i32> undef, i32 %a4, i32 1 + %v15 = insertelement <4 x i32> undef, i32 %a4, i32 2 + %v16 = insertelement <4 x i32> undef, i32 %a4, i32 3 + %res1 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v1, <8 x i32> undef, <4 x i32> undef, i32 1) - %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res2 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v2, <8 x i32> undef, <4 x i32> undef, i32 2) - %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res3 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v3, <8 x i32> undef, <4 x i32> undef, i32 3) - %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res4 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v4, <8 x i32> undef, <4 x i32> undef, i32 4) - %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res5 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v5, <8 x i32> undef, <4 x i32> undef, i32 5) - %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res6 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v6, <8 x i32> undef, <4 x i32> undef, i32 6) - %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res7 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v7, <8 x i32> undef, <4 x i32> undef, i32 7) - %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res8 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v8, <8 x i32> undef, <4 x i32> undef, i32 8) - %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res9 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v9, <8 x i32> undef, <4 x i32> undef, i32 9) - %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res10 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v10, <8 x i32> undef, <4 x i32> undef, i32 10) - %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res11 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v11, <8 x i32> undef, <4 x i32> undef, i32 11) - %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res12 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v12, <8 x i32> undef, <4 x i32> undef, i32 12) - %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res13 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v13, <8 x i32> undef, <4 x i32> undef, i32 13) - %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res14 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v14, <8 x i32> undef, <4 x i32> undef, i32 14) - %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res15 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v15, <8 x i32> undef, <4 x i32> undef, i32 15) - %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> undef, + %res16 = call <4 x float> @llvm.SI.sample.(i32 15, <4 x i32> %v16, <8 x i32> undef, <4 x i32> undef, i32 16) + %e1 = extractelement <4 x float> %res1, i32 0 + %e2 = extractelement <4 x float> %res2, i32 0 + %e3 = extractelement <4 x float> %res3, i32 0 + %e4 = extractelement <4 x float> %res4, i32 0 + %e5 = extractelement <4 x float> %res5, i32 0 + %e6 = extractelement <4 x float> %res6, i32 0 + %e7 = extractelement <4 x float> %res7, i32 0 + %e8 = extractelement <4 x float> %res8, i32 0 + %e9 = extractelement <4 x float> %res9, i32 0 + %e10 = extractelement <4 x float> %res10, i32 0 + %e11 = extractelement <4 x float> %res11, i32 0 + %e12 = extractelement <4 x float> %res12, i32 0 + %e13 = extractelement <4 x float> %res13, i32 0 + %e14 = extractelement <4 x float> %res14, i32 0 + %e15 = extractelement <4 x float> %res15, i32 0 + %e16 = extractelement <4 x float> %res16, i32 0 + %s1 = fadd float %e1, %e2 + %s2 = fadd float %s1, %e3 + %s3 = fadd float %s2, %e4 + %s4 = fadd float %s3, %e5 + %s5 = fadd float %s4, %e6 + %s6 = fadd float %s5, %e7 + %s7 = fadd float %s6, %e8 + %s8 = fadd float %s7, %e9 + %s9 = fadd float %s8, %e10 + %s10 = fadd float %s9, %e11 + %s11 = fadd float %s10, %e12 + %s12 = fadd float %s11, %e13 + %s13 = fadd float %s12, %e14 + %s14 = fadd float %s13, %e15 + %s15 = fadd float %s14, %e16 + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %s15, float %s15, float %s15, float %s15) ret void } -declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/llvm.pow.ll b/test/CodeGen/R600/llvm.pow.ll index 0ae9172579..b4ce9f429f 100644 --- a/test/CodeGen/R600/llvm.pow.ll +++ b/test/CodeGen/R600/llvm.pow.ll @@ -1,7 +1,7 @@ ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;CHECK: LOG_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;CHECK-NEXT: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} +;CHECK-NEXT: MUL NON-IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK-NEXT: EXP_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} define void @test() { diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll new file mode 100644 index 0000000000..423adb9da9 --- /dev/null +++ b/test/CodeGen/R600/lshl.ll @@ -0,0 +1,14 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_LSHLREV_B32_e32 VGPR0, 1, VGPR0 + +define void @test(i32 %p) { + %i = mul i32 %p, 2 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll new file mode 100644 index 0000000000..551eac1d76 --- /dev/null +++ b/test/CodeGen/R600/lshr.ll @@ -0,0 +1,14 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 + +define void @test(i32 %p) { + %i = udiv i32 %p, 2 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll new file mode 100644 index 0000000000..28744e00c3 --- /dev/null +++ b/test/CodeGen/R600/mulhu.ll @@ -0,0 +1,16 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_MOV_B32_e32 VGPR1, -1431655765 +;CHECK-NEXT: V_MUL_HI_U32 VGPR0, VGPR0, VGPR1, 0, 0, 0, 0, 0 +;CHECK-NEXT: V_LSHRREV_B32_e32 VGPR0, 1, VGPR0 + +define void @test(i32 %p) { + %i = udiv i32 %p, 3 + %r = bitcast i32 %i to float + call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare <4 x float> @llvm.SI.sample.(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32) readnone + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll index 18895a423e..eb8b052b6f 100644 --- a/test/CodeGen/R600/predicates.ll +++ b/test/CodeGen/R600/predicates.ll @@ -45,10 +45,12 @@ ENDIF: } ; CHECK: @nested_if -; CHECK: IF_PREDICATE_SET +; CHECK: ALU_PUSH_BEFORE +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec +; CHECK: JUMP ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, ; CHECK: LSHL T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: ENDIF +; CHECK: POP define void @nested_if(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 @@ -70,11 +72,13 @@ ENDIF: } ; CHECK: @nested_if_else -; CHECK: IF_PREDICATE_SET +; CHECK: ALU_PUSH_BEFORE +; CHECK: PRED_SET{{[EGN][ET]*}}_INT Exec +; CHECK: JUMP ; CHECK: PRED_SET{{[EGN][ET]*}}_INT Pred, ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel ; CHECK: LSH{{[LR] T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, 0(0.000000e+00) Pred_sel -; CHECK: ENDIF +; CHECK: POP define void @nested_if_else(i32 addrspace(1)* %out, i32 %in) { entry: %0 = icmp sgt i32 %in, 0 diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll new file mode 100644 index 0000000000..5ab4b87d57 --- /dev/null +++ b/test/CodeGen/R600/seto.ll @@ -0,0 +1,13 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_CMP_O_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 + +define void @main(float %p) { +main_body: + %c = fcmp oeq float %p, %p + %r = select i1 %c, float 1.000000e+00, float 0.000000e+00 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll new file mode 100644 index 0000000000..320835576d --- /dev/null +++ b/test/CodeGen/R600/setuo.ll @@ -0,0 +1,13 @@ +;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s + +;CHECK: V_CMP_U_F32_e64 SGPR0_SGPR1, VGPR0, VGPR0, 0, 0, 0, 0 + +define void @main(float %p) { +main_body: + %c = fcmp une float %p, %p + %r = select i1 %c, float 1.000000e+00, float 0.000000e+00 + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %r, float %r, float %r, float %r) + ret void +} + +declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) diff --git a/test/CodeGen/SPARC/DbgValueOtherTargets.test b/test/CodeGen/SPARC/DbgValueOtherTargets.test index 749a7b5346..a669bf848d 100644 --- a/test/CodeGen/SPARC/DbgValueOtherTargets.test +++ b/test/CodeGen/SPARC/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=sparc -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=sparc -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg index 786fee9e66..6f30a87979 100644 --- a/test/CodeGen/SPARC/lit.local.cfg +++ b/test/CodeGen/SPARC/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'Sparc' in targets: diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll index 498c78165e..d6b6495691 100644 --- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -47,8 +47,8 @@ declare double @sqrt(double) nounwind readonly declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !0 = metadata !{i32 46, i32 0, metadata !1, null} -!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ] -!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!1 = metadata !{i32 524299, metadata !4, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ] +!2 = metadata !{i32 524299, metadata !4, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ] !3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] !4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ] !5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] @@ -140,8 +140,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ] !92 = metadata !{i32 48, i32 0, metadata !1, null} !93 = metadata !{i32 218, i32 0, metadata !94, metadata !96} -!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ] -!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!94 = metadata !{i32 524299, metadata !4, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ] +!95 = metadata !{i32 524299, metadata !4, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ] !96 = metadata !{i32 51, i32 0, metadata !1, null} !97 = metadata !{i32 227, i32 0, metadata !94, metadata !96} !98 = metadata !{i32 52, i32 0, metadata !1, null} diff --git a/test/CodeGen/Thumb/DbgValueOtherTargets.test b/test/CodeGen/Thumb/DbgValueOtherTargets.test index aa31a7c75c..afb18a43be 100644 --- a/test/CodeGen/Thumb/DbgValueOtherTargets.test +++ b/test/CodeGen/Thumb/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=thumb -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=thumb -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg index cb77b09ef4..4d75f581a1 100644 --- a/test/CodeGen/Thumb/lit.local.cfg +++ b/test/CodeGen/Thumb/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'ARM' in targets: diff --git a/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll index 0af2445d7f..2e4cb1fe7e 100644 --- a/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll +++ b/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; The old instruction selector used to load all arguments to a call up in ; registers, then start pushing them all onto the stack. This is bad news as ; it makes a ton of annoying overlapping live ranges. This code should not diff --git a/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll index 1a3d74918d..7673124d5d 100644 --- a/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll +++ b/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -stats 2>&1 | \ ; RUN: grep asm-printer | grep 7 diff --git a/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll index 5cba3efeef..faa3e21a93 100644 --- a/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll +++ b/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \ ; RUN: not grep "Number of register spills" ; END. diff --git a/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/2006-05-02-InstrSched1.ll index 1c75f93915..0afddd8f87 100644 --- a/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll +++ b/test/CodeGen/X86/2006-05-02-InstrSched1.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \ ; RUN: grep asm-printer | grep 14 ; diff --git a/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/2006-05-02-InstrSched2.ll index 95eefa1e71..222b7a0b41 100644 --- a/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll +++ b/test/CodeGen/X86/2006-05-02-InstrSched2.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -stats 2>&1 | \ ; RUN: grep asm-printer | grep 13 diff --git a/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index 37c510786a..6912351d7b 100644 --- a/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \ ; RUN: grep "asm-printer" | grep 35 diff --git a/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll index a1b973d7cc..363a6008a0 100644 --- a/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll +++ b/test/CodeGen/X86/2008-02-18-TailMergingBug.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16 ; PR1909 diff --git a/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll index b2cf34cd20..0310a5dcb5 100644 --- a/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll +++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s ; Now this test spills one register. But a reload in the loop is cheaper than ; the divsd so it's a win. diff --git a/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/2009-02-25-CommuteBug.ll index 9cbf350940..9ea34e27a1 100644 --- a/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll +++ b/test/CodeGen/X86/2009-02-25-CommuteBug.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted ; rdar://6608609 diff --git a/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index d50fe6f73a..68a9fafb6d 100644 --- a/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm" ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s ; rdar://6627786 diff --git a/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll index d934ec9a88..351a1722a2 100644 --- a/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll +++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t ; RUN: not grep spill %t ; RUN: not grep "%rsp" %t diff --git a/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll index ad18a0c5b9..0607eda271 100644 --- a/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll +++ b/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded" ; XFAIL: * ; 69408 removed the opportunity for this optimization to work diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll index 7f2b8206b1..7dba332b1b 100644 --- a/test/CodeGen/X86/2010-01-18-DbgValue.ll +++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll @@ -29,23 +29,24 @@ return: ; preds = %entry declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!3} -!18 = metadata !{metadata !1} !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !18, null} ; [ DW_TAG_compile_unit ] -!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6, metadata !7} -!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] -!7 = metadata !{i32 786451, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ] +!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ] !8 = metadata !{metadata !9, metadata !14} -!9 = metadata !{i32 786445, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] -!10 = metadata !{i32 786451, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] +!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] +!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] !11 = metadata !{metadata !12, metadata !13} -!12 = metadata !{i32 786445, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] -!13 = metadata !{i32 786445, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ] -!14 = metadata !{i32 786445, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ] +!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] +!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ] +!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ] !15 = metadata !{i32 11, i32 0, metadata !1, null} !16 = metadata !{i32 12, i32 0, metadata !17, null} !17 = metadata !{i32 786443, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ] +!18 = metadata !{metadata !1} +!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"} diff --git a/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/2010-01-19-OptExtBug.ll index eb4a5c04a2..ec24e73c34 100644 --- a/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll +++ b/test/CodeGen/X86/2010-01-19-OptExtBug.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp { diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll index 5707befb9c..8ab93fcb97 100644 --- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll +++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll @@ -199,25 +199,23 @@ declare float @copysignf(float, float) nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!3} -!43 = metadata !{metadata !0, metadata !11, metadata !12, metadata !13, metadata !14, metadata !16, metadata !17, metadata !18} -!44 = metadata !{metadata !1} !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !44, null} ; [ DW_TAG_compile_unit ] -!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] +!1 = metadata !{i32 786478, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !44, null, metadata !""} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9} -!6 = metadata !{i32 786454, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] -!7 = metadata !{i32 786473, metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ] -!8 = metadata !{i32 786468, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ] -!9 = metadata !{i32 786454, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] -!10 = metadata !{i32 786468, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] +!7 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ] +!8 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"complex float", i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SFtype", i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] +!10 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] !11 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ] !12 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ] !13 = metadata !{i32 786689, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ] !14 = metadata !{i32 786688, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ] -!15 = metadata !{i32 786443, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ] +!15 = metadata !{i32 786443, metadata !2, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ] !16 = metadata !{i32 786688, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ] !17 = metadata !{i32 786688, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ] !18 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ] @@ -245,3 +243,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !40 = metadata !{i32 1964, i32 0, metadata !15, null} !41 = metadata !{i32 1965, i32 0, metadata !15, null} !42 = metadata !{i32 1969, i32 0, metadata !15, null} +!43 = metadata !{metadata !0, metadata !11, metadata !12, metadata !13, metadata !14, metadata !16, metadata !17, metadata !18} +!44 = metadata !{metadata !1} +!45 = metadata !{metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"} +!46 = metadata !{metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"} diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll index fa1e426a8f..6519ca063a 100644 --- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll +++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll @@ -22,22 +22,17 @@ declare void @foo(i32) nounwind optsize noinline ssp declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!31 = metadata !{metadata !0} -!32 = metadata !{metadata !5, metadata !9, metadata !19} -!33 = metadata !{metadata !4} -!34 = metadata !{metadata !8} -!35 = metadata !{metadata !18, metadata !25, metadata !26} !0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ] -!1 = metadata !{i32 786473, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !32, metadata !31, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ] -!5 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] !7 = metadata !{null, metadata !3} !8 = metadata !{i32 786689, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ] -!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ] +!9 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ] !10 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] !11 = metadata !{metadata !12, metadata !13} !12 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] @@ -47,7 +42,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !16 = metadata !{i32 786445, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ] !17 = metadata !{i32 786445, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ] !18 = metadata !{i32 786689, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ] -!19 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ] +!19 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ] !20 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ] !21 = metadata !{metadata !3, metadata !3, metadata !22} !22 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ] @@ -55,10 +50,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !24 = metadata !{i32 786468, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] !25 = metadata !{i32 786689, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22, i32 0, null} ; [ DW_TAG_arg_variable ] !26 = metadata !{i32 786688, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14, i32 0, null} ; [ DW_TAG_auto_variable ] -!27 = metadata !{i32 786443, metadata !19, i32 22, i32 0} ; [ DW_TAG_lexical_block ] +!27 = metadata !{i32 786443, metadata !36, metadata !19, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ] !28 = metadata !{i32 18, i32 0, metadata !29, null} -!29 = metadata !{i32 786443, metadata !9, i32 17, i32 0} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 786443, metadata !36, metadata !9, i32 17, i32 0, i32 1} ; [ DW_TAG_lexical_block ] !30 = metadata !{i32 19, i32 0, metadata !29, null} +!31 = metadata !{metadata !0} +!32 = metadata !{metadata !5, metadata !9, metadata !19} +!33 = metadata !{metadata !4} +!34 = metadata !{metadata !8} +!35 = metadata !{metadata !18, metadata !25, metadata !26} +!36 = metadata !{metadata !"foo.c", metadata !"/tmp/"} ; The variable bar:myvar changes registers after the first movq. ; It is cobbered by popq %rbx diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll index e1154e340d..4ea3bf0778 100644 --- a/test/CodeGen/X86/2010-05-28-Crash.ll +++ b/test/CodeGen/X86/2010-05-28-Crash.ll @@ -23,25 +23,26 @@ entry: } !llvm.dbg.cu = !{!3} -!15 = metadata !{metadata !0} -!16 = metadata !{metadata !7} -!17 = metadata !{metadata !1, metadata !8} !0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"f.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"f.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !17, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ] !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6, metadata !6} !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !7 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] -!8 = metadata !{i32 786478, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ] !9 = metadata !{i32 3, i32 0, metadata !10, null} -!10 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ] +!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ] !11 = metadata !{i32 1} !12 = metadata !{i32 3, i32 0, metadata !10, metadata !13} !13 = metadata !{i32 7, i32 0, metadata !14, null} -!14 = metadata !{i32 786443, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ] +!14 = metadata !{i32 786443, metadata !2, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ] +!15 = metadata !{metadata !0} +!16 = metadata !{metadata !7} +!17 = metadata !{metadata !1, metadata !8} +!18 = metadata !{metadata !"f.c", metadata !"/tmp"} ;CHECK: DEBUG_VALUE: bar:x <- E ;CHECK: Ltmp diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll index 898f808128..b764b0b345 100644 --- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll +++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll @@ -22,14 +22,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28} !0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", metadata !3, i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786451, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ] -!3 = metadata !{i32 786473, metadata !"foo.cp", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ] !4 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] !5 = metadata !{metadata !6, metadata !1, metadata !8} !6 = metadata !{i32 786445, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] !7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!8 = metadata !{i32 786478, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", metadata !3, i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 786478, metadata !3, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ] !9 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ] !10 = metadata !{metadata !7, metadata !11, metadata !7} !11 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ] @@ -39,7 +39,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !15 = metadata !{i32 786689, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ] !16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] !17 = metadata !{i32 786689, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] -!18 = metadata !{i32 786478, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ] +!18 = metadata !{i32 786478, metadata !3, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ] !19 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ] !20 = metadata !{metadata !7, metadata !7, metadata !21} !21 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] @@ -52,3 +52,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !28 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ] !29 = metadata !{i32 16, i32 0, metadata !30, null} !30 = metadata !{i32 786443, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ] +!31 = metadata !{metadata !"foo.cp", metadata !"/tmp/"} diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll index 4ae6996c02..aaa562a439 100644 --- a/test/CodeGen/X86/2010-08-04-StackVariable.ll +++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll @@ -77,27 +77,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!3} !46 = metadata !{metadata !0, metadata !9, metadata !16, metadata !17, metadata !20} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ] +!0 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786451, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ] !2 = metadata !{i32 786473, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !46, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786449, i32 4, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !46, null, metadata !""} ; [ DW_TAG_compile_unit ] !4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9} !5 = metadata !{i32 786445, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] !6 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] !7 = metadata !{i32 786445, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] !8 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] -!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ] +!9 = metadata !{i32 786478, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ] !10 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] !11 = metadata !{null, metadata !12, metadata !13} !12 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ] !13 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !14 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ] !15 = metadata !{null, metadata !12} -!16 = metadata !{i32 786478, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ] -!17 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ] +!16 = metadata !{i32 786478, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ] !18 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ] !19 = metadata !{metadata !13, metadata !13, metadata !1} -!20 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ] +!20 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ] !21 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ] !22 = metadata !{metadata !13} !23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ] @@ -105,7 +105,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ] !26 = metadata !{i32 786448, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ] !27 = metadata !{i32 17, i32 0, metadata !28, null} -!28 = metadata !{i32 786443, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ] +!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ] !29 = metadata !{i32 18, i32 0, metadata !28, null} !30 = metadata !{i32 20, i32 0, metadata !28, null} !31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, null} ; [ DW_TAG_arg_variable ] @@ -113,11 +113,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !33 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ] !34 = metadata !{i32 11, i32 0, metadata !16, null} !35 = metadata !{i32 11, i32 0, metadata !36, null} -!36 = metadata !{i32 786443, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] -!37 = metadata !{i32 786443, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!36 = metadata !{i32 786443, metadata !2, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ] +!37 = metadata !{i32 786443, metadata !2, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] !38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, null} ; [ DW_TAG_auto_variable ] -!39 = metadata !{i32 786443, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ] -!40 = metadata !{i32 786443, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ] +!39 = metadata !{i32 786443, metadata !2, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 786443, metadata !2, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ] !41 = metadata !{i32 24, i32 0, metadata !39, null} !42 = metadata !{i32 25, i32 0, metadata !39, null} !43 = metadata !{i32 26, i32 0, metadata !39, null} diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll index 60fb68b791..de0d216e26 100644 --- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll +++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll @@ -13,18 +13,20 @@ entry: } !llvm.dbg.cu = !{!2} -!13 = metadata !{metadata !0, metadata !6} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !13, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, metadata !13, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786473, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!6 = metadata !{i32 786478, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ] !8 = metadata !{i32 53, i32 13, metadata !9, null} !9 = metadata !{i32 786443, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] !10 = metadata !{i32 4, i32 13, metadata !11, null} !11 = metadata !{i32 786443, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ] !12 = metadata !{i32 786443, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ] +!13 = metadata !{metadata !0, metadata !6} +!14 = metadata !{metadata !"", metadata !"/private/tmp"} +!15 = metadata !{metadata !"bug.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll index 9cf2747b77..31a6822b34 100644 --- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll +++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll @@ -16,12 +16,10 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!15 = metadata !{metadata !0} -!16 = metadata !{metadata !6} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !15, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, null, null, metadata !15, null, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -34,3 +32,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !12 = metadata !{i32 3, i32 47, metadata !0, null} !13 = metadata !{i32 4, i32 2, metadata !14, null} !14 = metadata !{i32 786443, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!15 = metadata !{metadata !0} +!16 = metadata !{metadata !6} +!17 = metadata !{metadata !"one.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/X86/2010-12-02-MC-Set.ll b/test/CodeGen/X86/2010-12-02-MC-Set.ll index 628e5ba606..4d8d974f70 100644 --- a/test/CodeGen/X86/2010-12-02-MC-Set.ll +++ b/test/CodeGen/X86/2010-12-02-MC-Set.ll @@ -9,9 +9,9 @@ entry: !llvm.dbg.cu = !{!2} !7 = metadata !{metadata !0} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 786473, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !7, null} ; [ DW_TAG_compile_unit ] +!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 120563)", i1 false, metadata !"", i32 0, null, null, metadata !7, null, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{null} !5 = metadata !{i32 5, i32 1, metadata !6, null} diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll index 3d3fff9604..2355528a81 100644 --- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll +++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll @@ -70,35 +70,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone declare i32 @puts(i8* nocapture) nounwind !llvm.dbg.cu = !{!2} -!28 = metadata !{metadata !0, metadata !6} -!29 = metadata !{metadata !10, metadata !11, metadata !12} -!30 = metadata !{metadata !14, metadata !17} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786478, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} -!5 = metadata !{i32 786468, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786453, metadata !1, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !11 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !12 = metadata !{i32 786688, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] -!13 = metadata !{i32 786443, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!13 = metadata !{i32 786443, metadata !1, metadata !0, i32 5, i32 52, i32 0} ; [ DW_TAG_lexical_block ] !14 = metadata !{i32 786688, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0, null} ; [ DW_TAG_auto_variable ] -!15 = metadata !{i32 786443, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] -!16 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!15 = metadata !{i32 786443, metadata !1, metadata !6, i32 25, i32 12, i32 2} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] !17 = metadata !{i32 786688, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ] !18 = metadata !{i32 5, i32 41, metadata !0, null} !19 = metadata !{i32 5, i32 49, metadata !0, null} !20 = metadata !{i32 7, i32 5, metadata !13, null} !21 = metadata !{i32 8, i32 9, metadata !22, null} -!22 = metadata !{i32 786443, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!22 = metadata !{i32 786443, metadata !1, metadata !13, i32 7, i32 14, i32 1} ; [ DW_TAG_lexical_block ] !23 = metadata !{i32 9, i32 9, metadata !22, null} !24 = metadata !{i32 26, i32 38, metadata !15, null} !25 = metadata !{i32 27, i32 38, metadata !15, null} !26 = metadata !{i32 28, i32 9, metadata !15, null} !27 = metadata !{i32 30, i32 1, metadata !15, null} +!28 = metadata !{metadata !0, metadata !6} +!29 = metadata !{metadata !10, metadata !11, metadata !12} +!30 = metadata !{metadata !14, metadata !17} +!31 = metadata !{metadata !"rem_small.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll index 47ef693cc2..6f43b94b26 100644 --- a/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll +++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s ; ; This test should not cause any spilling with RAFast. diff --git a/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/test/CodeGen/X86/2011-09-14-valcoalesce.ll index a5ec614a94..54d2b40350 100644 --- a/test/CodeGen/X86/2011-09-14-valcoalesce.ll +++ b/test/CodeGen/X86/2011-09-14-valcoalesce.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -disable-code-place | FileCheck %s +; RUN: llc < %s -march=x86 -disable-block-placement | FileCheck %s ; ; Test RegistersDefinedFromSameValue. We have multiple copies of the same vreg: ; while.body85.i: diff --git a/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll index 18a3313773..90d8d3d2dd 100644 --- a/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll +++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \ ; RUN: not grep "Number of machine instructions hoisted out of loops post regalloc" diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll index f149e4a11e..9525653f3f 100644 --- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll +++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll @@ -42,9 +42,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !2 = metadata !{i32 0} !3 = metadata !{null} !4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725] -!5 = metadata !{i32 786473, metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh", null} ; [ DW_TAG_file_type ] -!6 = metadata !{i32 786454, null, metadata !"hgstruct", metadata !5, i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ] -!7 = metadata !{i32 786451, null, metadata !"", metadata !5, i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ] +!5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ] +!6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ] +!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ] !8 = metadata !{metadata !"short", metadata !9} !9 = metadata !{metadata !"omnipotent char", metadata !10} !10 = metadata !{metadata !"Simple C/C++ TBAA"} +!11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"} diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll index f171c16df3..a0fbbb2ff9 100644 --- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll +++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll @@ -78,11 +78,12 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...) !11 = metadata !{i32 786443, metadata !12, i32 250, i32 0, metadata !14, i32 24} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] !12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] !13 = metadata !{i32 786443, metadata !3, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] -!14 = metadata !{i32 786473, metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset", null} ; [ DW_TAG_file_type ] -!15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char] -!16 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char] +!14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ] +!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char] +!16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char] !17 = metadata !{metadata !18} !18 = metadata !{i32 786465, i64 0, i64 20} ; [ DW_TAG_subrange_type ] [0, 19] +!19 = metadata !{metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset"} ; Test DebugValue uses visited by RegisterPressureTracker findUseBetween(). ; @@ -102,7 +103,7 @@ cond.true: ; preds = %entry unreachable cond.end: ; preds = %entry - call void @llvm.dbg.declare(metadata !{%"class.__gnu_cxx::hash_map"* %X}, metadata !21) + call void @llvm.dbg.declare(metadata !{%"class.__gnu_cxx::hash_map"* %X}, metadata !31) %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5 invoke void @_Znwm() to label %exit.i unwind label %lpad2.i.i.i.i @@ -126,9 +127,10 @@ declare i32 @__gxx_personality_v0(...) declare void @_Znwm() -!llvm.dbg.cu = !{!20} +!llvm.dbg.cu = !{!30} -!20 = metadata !{i32 786449, i32 0, i32 4, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, i1 true, metadata !"", i32 0, null, null, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus] -!21 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !22, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29] -!22 = metadata !{i32 786454, null, metadata !"HM", metadata !23, i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ] -!23 = metadata !{i32 786473, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", null} ; [ DW_TAG_file_type ] +!30 = metadata !{i32 786449, i32 0, i32 4, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, i1 true, metadata !"", i32 0, null, null, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus] +!31 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !32, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29] +!32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ] +!33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ] +!34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"} diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll index d290d514cc..df93c5647d 100644 --- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll +++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll @@ -39,5 +39,6 @@ invoke.cont44: ; preds = %if.end !1 = metadata !{metadata !2} !2 = metadata !{null, null} !3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214] -!4 = metadata !{i32 786451, null, metadata !"btCompoundLeafCallback", metadata !5, i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ] -!5 = metadata !{i32 786473, metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet", null} ; [ DW_TAG_file_type ] +!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ] +!5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ] +!6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"} diff --git a/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll index db7ec8ae26..1b417e54a2 100644 --- a/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll +++ b/test/CodeGen/X86/2013-01-09-DAGCombineBug.ll @@ -39,3 +39,36 @@ define void @t() nounwind uwtable ssp { ; <label>:11 ; preds = %11, %4 br label %11 } + +; PR15608 +@global = external constant [2 x i8] + +define void @PR15608() { +bb: + br label %bb3 + +bb1: ; No predecessors! + br i1 icmp ult (i64 xor (i64 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i64), i64 1), i64 1), label %bb2, label %bb3 + +bb2: ; preds = %bb1 + unreachable + +bb3: ; preds = %bb1, %bb + br i1 xor (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1), i1 trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1)), label %bb7, label %bb4 + +bb4: ; preds = %bb6, %bb3 + %tmp = phi i1 [ true, %bb6 ], [ trunc (i192 lshr (i192 or (i192 and (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 -340282366920938463463374607431768211457), i192 shl (i192 zext (i1 trunc (i192 lshr (i192 or (i192 shl (i192 zext (i64 trunc (i128 lshr (i128 trunc (i384 lshr (i384 or (i384 shl (i384 zext (i64 ptrtoint ([2 x i8]* @global to i64) to i384), i384 192), i384 425269881901436522087161771558896140289), i384 128) to i128), i128 64) to i64) to i192), i192 64), i192 1), i192 128) to i1) to i192), i192 128)), i192 128) to i1), %bb3 ] + br i1 false, label %bb8, label %bb5 + +bb5: ; preds = %bb4 + br i1 %tmp, label %bb8, label %bb6 + +bb6: ; preds = %bb5 + br i1 false, label %bb8, label %bb4 + +bb7: ; preds = %bb3 + unreachable + +bb8: ; preds = %bb6, %bb5, %bb4 + unreachable +} diff --git a/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll new file mode 100644 index 0000000000..03b6bdeafa --- /dev/null +++ b/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx-i -show-mc-encoding + +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@b = external global [8 x float], align 32 +@e = external global [8 x float], align 16 + +define void @main() #0 { +entry: + %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32, !tbaa !0 + %bitcast.i = extractelement <8 x float> %0, i32 0 + %vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0 + %vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1 + %vecinit3.i.i = insertelement <4 x float> %vecinit2.i.i, float 0.000000e+00, i32 2 + %vecinit4.i.i = insertelement <4 x float> %vecinit3.i.i, float 0.000000e+00, i32 3 + %1 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecinit4.i.i) #2 + %vecext.i.i = extractelement <4 x float> %1, i32 0 + store float %vecext.i.i, float* getelementptr inbounds ([8 x float]* @e, i64 0, i64 0), align 16, !tbaa !0 + unreachable +} + +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) #1 + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind } + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/CodeGen/X86/DbgValueOtherTargets.test b/test/CodeGen/X86/DbgValueOtherTargets.test index c95e8c6abd..7b4d431c93 100644 --- a/test/CodeGen/X86/DbgValueOtherTargets.test +++ b/test/CodeGen/X86/DbgValueOtherTargets.test @@ -1,2 +1,2 @@ -; RUN: llc -O0 -march=x86 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll -; RUN: llc -O0 -march=x86-64 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=x86 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=x86-64 -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/X86/GC/erlang-gc.ll b/test/CodeGen/X86/GC/erlang-gc.ll new file mode 100644 index 0000000000..c55b7f6dcf --- /dev/null +++ b/test/CodeGen/X86/GC/erlang-gc.ll @@ -0,0 +1,25 @@ +; RUN: llc -mtriple=x86_64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK64 +; RUN: llc -mtriple=i686-linux-gnu < %s | FileCheck %s --check-prefix=CHECK32 + +define i32 @main(i32 %x) nounwind gc "erlang" { + %puts = tail call i32 @foo(i32 %x) + ret i32 0 + +; CHECK64: .section .note.gc,"",@progbits +; CHECK64-NEXT: .align 8 +; CHECK64-NEXT: .short 1 # safe point count +; CHECK64-NEXT: .long .Ltmp0 # safe point address +; CHECK64-NEXT: .short 1 # stack frame size (in words) +; CHECK64-NEXT: .short 0 # stack arity +; CHECK64-NEXT: .short 0 # live root count + +; CHECK32: .section .note.gc,"",@progbits +; CHECK32-NEXT: .align 4 +; CHECK32-NEXT: .short 1 # safe point count +; CHECK32-NEXT: .long .Ltmp0 # safe point address +; CHECK32-NEXT: .short 3 # stack frame size (in words) +; CHECK32-NEXT: .short 0 # stack arity +; CHECK32-NEXT: .short 0 # live root count +} + +declare i32 @foo(i32) diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll index cf5d520073..227ef3466e 100644 --- a/test/CodeGen/X86/MachineSink-DbgValue.ll +++ b/test/CodeGen/X86/MachineSink-DbgValue.ll @@ -26,12 +26,10 @@ bb2: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!18 = metadata !{metadata !1} -!19 = metadata !{metadata !6, metadata !7, metadata !10} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ] +!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -40,10 +38,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !8 = metadata !{i32 786447, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] !9 = metadata !{i32 786468, metadata !0, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] !10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ] -!11 = metadata !{i32 786443, metadata !1, i32 2, i32 25, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ] !12 = metadata !{i32 2, i32 13, metadata !1, null} !13 = metadata !{i32 2, i32 22, metadata !1, null} !14 = metadata !{i32 3, i32 14, metadata !11, null} !15 = metadata !{i32 4, i32 3, metadata !11, null} !16 = metadata !{i32 5, i32 5, metadata !11, null} !17 = metadata !{i32 7, i32 1, metadata !11, null} +!18 = metadata !{metadata !1} +!19 = metadata !{metadata !6, metadata !7, metadata !10} +!20 = metadata !{metadata !"a.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll index 33141680aa..1329200c3e 100644 --- a/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll +++ b/test/CodeGen/X86/MachineSink-PHIUse.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink" define fastcc void @t() nounwind ssp { diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index fbe8879ad6..bb227a0185 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -337,3 +337,99 @@ block4: ; preds = %4, %.lr.ph ret void } +; Make sure that we merge the consecutive load/store sequence below and use a +; word (16 bit) instead of a byte copy. +; CHECK: MergeLoadStoreBaseIndexOffset +; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] +; CHECK: movw [[REG]], (%{{.*}}) +define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { + br label %1 + +; <label>:1 + %.09 = phi i32 [ %n, %0 ], [ %11, %1 ] + %.08 = phi i8* [ %b, %0 ], [ %10, %1 ] + %.0 = phi i64* [ %a, %0 ], [ %2, %1 ] + %2 = getelementptr inbounds i64* %.0, i64 1 + %3 = load i64* %.0, align 1 + %4 = getelementptr inbounds i8* %c, i64 %3 + %5 = load i8* %4, align 1 + %6 = add i64 %3, 1 + %7 = getelementptr inbounds i8* %c, i64 %6 + %8 = load i8* %7, align 1 + store i8 %5, i8* %.08, align 1 + %9 = getelementptr inbounds i8* %.08, i64 1 + store i8 %8, i8* %9, align 1 + %10 = getelementptr inbounds i8* %.08, i64 2 + %11 = add nsw i32 %.09, -1 + %12 = icmp eq i32 %11, 0 + br i1 %12, label %13, label %1 + +; <label>:13 + ret void +} + +; Make sure that we merge the consecutive load/store sequence below and use a +; word (16 bit) instead of a byte copy even if there are intermediate sign +; extensions. +; CHECK: MergeLoadStoreBaseIndexOffsetSext +; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] +; CHECK: movw [[REG]], (%{{.*}}) +define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { + br label %1 + +; <label>:1 + %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] + %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] + %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] + %2 = getelementptr inbounds i8* %.0, i64 1 + %3 = load i8* %.0, align 1 + %4 = sext i8 %3 to i64 + %5 = getelementptr inbounds i8* %c, i64 %4 + %6 = load i8* %5, align 1 + %7 = add i64 %4, 1 + %8 = getelementptr inbounds i8* %c, i64 %7 + %9 = load i8* %8, align 1 + store i8 %6, i8* %.08, align 1 + %10 = getelementptr inbounds i8* %.08, i64 1 + store i8 %9, i8* %10, align 1 + %11 = getelementptr inbounds i8* %.08, i64 2 + %12 = add nsw i32 %.09, -1 + %13 = icmp eq i32 %12, 0 + br i1 %13, label %14, label %1 + +; <label>:14 + ret void +} + +; However, we can only merge ignore sign extensions when they are on all memory +; computations; +; CHECK: loadStoreBaseIndexOffsetSextNoSex +; CHECK-NOT: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] +; CHECK-NOT: movw [[REG]], (%{{.*}}) +define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) { + br label %1 + +; <label>:1 + %.09 = phi i32 [ %n, %0 ], [ %12, %1 ] + %.08 = phi i8* [ %b, %0 ], [ %11, %1 ] + %.0 = phi i8* [ %a, %0 ], [ %2, %1 ] + %2 = getelementptr inbounds i8* %.0, i64 1 + %3 = load i8* %.0, align 1 + %4 = sext i8 %3 to i64 + %5 = getelementptr inbounds i8* %c, i64 %4 + %6 = load i8* %5, align 1 + %7 = add i8 %3, 1 + %wrap.4 = sext i8 %7 to i64 + %8 = getelementptr inbounds i8* %c, i64 %wrap.4 + %9 = load i8* %8, align 1 + store i8 %6, i8* %.08, align 1 + %10 = getelementptr inbounds i8* %.08, i64 1 + store i8 %9, i8* %10, align 1 + %11 = getelementptr inbounds i8* %.08, i64 2 + %12 = add nsw i32 %.09, -1 + %13 = icmp eq i32 %12, 0 + br i1 %13, label %14, label %1 + +; <label>:14 + ret void +} diff --git a/test/CodeGen/X86/Stats/lit.local.cfg b/test/CodeGen/X86/Stats/lit.local.cfg deleted file mode 100644 index 1a5fd5ec86..0000000000 --- a/test/CodeGen/X86/Stats/lit.local.cfg +++ /dev/null @@ -1,8 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -targets = set(config.root.targets_to_build.split()) -if not 'X86' in targets: - config.unsupported = True - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll new file mode 100644 index 0000000000..4192f13d89 --- /dev/null +++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll @@ -0,0 +1,204 @@ +; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=atom 2>&1 | \ +; RUN: grep "calll" | not grep "(" +; RUN: llc < %s -mtriple=i386-linux-gnu -mcpu=core2 2>&1 | \ +; RUN: grep "calll" | grep "4-byte Folded Reload" + +%struct.targettype = type {i32} +%struct.op_ptr1 = type opaque +%struct.op_ptr2 = type opaque +%union.anon = type { [8 x i32], [48 x i8] } +%struct.const1 = type { [64 x i16], i8 } +%struct.const2 = type { [17 x i8], [256 x i8], i8 } + +%struct.ref1 = type { void (%struct.ref2*)*, i32 (%struct.ref2*)*, void (%struct.ref2*)*, i32 (%struct.ref2*, i8***)*, %struct.op_ptr2** } +%struct.ref2 = type { %struct.localref13*, %struct.localref15*, %struct.localref12*, i8*, i8, i32, %struct.localref11*, i32, i32, i32, i32, i32, i32, i32, double, i8, i8, i32, i8, i8, i8, i32, i8, i32, i8, i8, i8, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.const1*], [4 x %struct.const2*], [4 x %struct.const2*], i32, %struct.ref3*, i8, i8, [16 x i8], [16 x i8], [16 x i8], i32, i8, i8, i8, i8, i16, i16, i8, i8, i8, %struct.localref10*, i32, i32, i32, i32, i8*, i32, [4 x %struct.ref3*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.localref8*, %struct.localref9*, %struct.ref1*, %struct.localref7*, %struct.localref6*, %struct.localref5*, %struct.localref1*, %struct.ref4*, %struct.localref2*, %struct.localref3*, %struct.localref4* } +%struct.ref3 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, i32, %struct.const1*, i8* } +%struct.ref4 = type { void (%struct.ref2*)*, [5 x void (%struct.ref2*, %struct.ref3*, i16*, i8**, i32)*] } + +%struct.localref1 = type { void (%struct.ref2*)*, i8 (%struct.ref2*, [64 x i16]**)*, i8 } +%struct.localref2 = type { void (%struct.ref2*)*, void (%struct.ref2*, i8***, i32*, i32, i8**, i32*, i32)*, i8 } +%struct.localref3 = type { void (%struct.ref2*)*, void (%struct.ref2*, i8***, i32, i8**, i32)* } +%struct.localref4 = type { {}*, void (%struct.ref2*, i8**, i8**, i32)*, void (%struct.ref2*)*, void (%struct.ref2*)* } +%struct.localref5 = type { void (%struct.ref2*)*, i32 (%struct.ref2*)*, i8 (%struct.ref2*)*, i8, i8, i32, i32 } +%struct.localref6 = type { i32 (%struct.ref2*)*, void (%struct.ref2*)*, void (%struct.ref2*)*, void (%struct.ref2*)*, i8, i8 } +%struct.localref7 = type { void (%struct.ref2*, i32)*, void (%struct.ref2*, i8***, i32*, i32, i8**, i32*, i32)* } +%struct.localref8 = type { void (%struct.ref2*)*, void (%struct.ref2*)*, i8 } +%struct.localref9 = type { void (%struct.ref2*, i32)*, void (%struct.ref2*, i8**, i32*, i32)* } +%struct.localref10 = type { %struct.localref10*, i8, i32, i32, i8* } +%struct.localref11 = type { i8*, %struct.targettype, void (%struct.ref2*)*, i8 (%struct.ref2*)*, void (%struct.ref2*, %struct.targettype)*, i8 (%struct.ref2*, i32)*, void (%struct.ref2*)* } +%struct.localref12 = type { {}*, %struct.targettype, %struct.targettype, i32, i32 } +%struct.localref13 = type { void (%struct.localref14*)*, void (%struct.localref14*, i32)*, void (%struct.localref14*)*, void (%struct.localref14*, i8*)*, void (%struct.localref14*)*, i32, %union.anon, i32, %struct.targettype, i8**, i32, i8**, i32, i32 } +%struct.localref14 = type { %struct.localref13*, %struct.localref15*, %struct.localref12*, i8*, i8, i32 } +%struct.localref15 = type { i8* (%struct.localref14*, i32, %struct.targettype)*, i8* (%struct.localref14*, i32, %struct.targettype)*, i8** (%struct.localref14*, i32, i32, i32)*, [64 x i16]** (%struct.localref14*, i32, i32, i32)*, %struct.op_ptr1* (%struct.localref14*, i32, i8, i32, i32, i32)*, %struct.op_ptr2* (%struct.localref14*, i32, i8, i32, i32, i32)*, {}*, i8** (%struct.localref14*, %struct.op_ptr1*, i32, i32, i8)*, [64 x i16]** (%struct.localref14*, %struct.op_ptr2*, i32, i32, i8)*, void (%struct.localref14*, i32)*, {}*, %struct.targettype, %struct.targettype} + +define internal i32 @foldedreload(%struct.ref2* %cinfo, i8*** nocapture %output1) { + %1 = getelementptr inbounds %struct.ref2* %cinfo, i32 0, i32 79 + %2 = load %struct.ref1** %1, align 4 + %3 = getelementptr inbounds %struct.ref2* %cinfo, i32 0, i32 68 + %4 = load i32* %3, align 4 + %5 = add i32 %4, -1 + %6 = getelementptr inbounds %struct.ref2* %cinfo, i32 0, i32 64 + %7 = load i32* %6, align 4 + %8 = add i32 %7, -1 + %9 = getelementptr inbounds %struct.ref1* %2, i32 1, i32 1 + %10 = bitcast i32 (%struct.ref2*)** %9 to i32* + %11 = load i32* %10, align 4 + %12 = getelementptr inbounds %struct.ref1* %2, i32 1, i32 2 + %13 = bitcast void (%struct.ref2*)** %12 to i32* + %14 = load i32* %13, align 4 + %15 = icmp slt i32 %11, %14 + br i1 %15, label %.lr.ph18, label %._crit_edge19 + +.lr.ph18: + %16 = getelementptr inbounds %struct.ref1* %2, i32 1 + %17 = bitcast %struct.ref1* %16 to i32* + %18 = getelementptr inbounds %struct.ref1* %16, i32 0, i32 0 + %19 = getelementptr inbounds %struct.ref2* %cinfo, i32 0, i32 66 + %20 = getelementptr inbounds %struct.ref2* %cinfo, i32 0, i32 84 + %21 = getelementptr inbounds %struct.ref2* %cinfo, i32 0, i32 36 + %22 = getelementptr inbounds %struct.ref1* %2, i32 1, i32 3 + %23 = bitcast i32 (%struct.ref2*, i8***)** %22 to [10 x [64 x i16]*]* + %.pre = load i32* %17, align 4 + br label %24 + +; <label>:24 + %25 = phi i32 [ %14, %.lr.ph18 ], [ %89, %88 ] + %26 = phi i32 [ %.pre, %.lr.ph18 ], [ 0, %88 ] + %var1.015 = phi i32 [ %11, %.lr.ph18 ], [ %90, %88 ] + %27 = icmp ugt i32 %26, %5 + br i1 %27, label %88, label %.preheader7.lr.ph + +.preheader7.lr.ph: + %.pre24 = load i32* %19, align 4 + br label %.preheader7 + +.preheader7: + %28 = phi i32 [ %.pre24, %.preheader7.lr.ph ], [ %85, %._crit_edge11 ] + %var2.012 = phi i32 [ %26, %.preheader7.lr.ph ], [ %86, %._crit_edge11 ] + %29 = icmp sgt i32 %28, 0 + br i1 %29, label %.lr.ph10, label %._crit_edge11 + +.lr.ph10: + %30 = phi i32 [ %28, %.preheader7 ], [ %82, %81 ] + %var4.09 = phi i32 [ 0, %.preheader7 ], [ %var4.1.lcssa, %81 ] + %ci.08 = phi i32 [ 0, %.preheader7 ], [ %83, %81 ] + %31 = getelementptr inbounds %struct.ref2* %cinfo, i32 0, i32 67, i32 %ci.08 + %32 = load %struct.ref3** %31, align 4 + %33 = getelementptr inbounds %struct.ref3* %32, i32 0, i32 1 + %34 = load i32* %33, align 4 + %35 = load %struct.ref4** %20, align 4 + %36 = getelementptr inbounds %struct.ref4* %35, i32 0, i32 1, i32 %34 + %37 = load void (%struct.ref2*, %struct.ref3*, i16*, i8**, i32)** %36, align 4 + %38 = getelementptr inbounds %struct.ref3* %32, i32 0, i32 17 + %39 = load i32* %38, align 4 + %40 = getelementptr inbounds %struct.ref3* %32, i32 0, i32 9 + %41 = load i32* %40, align 4 + %42 = getelementptr inbounds %struct.ref3* %32, i32 0, i32 16 + %43 = load i32* %42, align 4 + %44 = mul i32 %43, %var2.012 + %45 = getelementptr inbounds %struct.ref3* %32, i32 0, i32 14 + %46 = load i32* %45, align 4 + %47 = icmp sgt i32 %46, 0 + br i1 %47, label %.lr.ph6, label %81 + +.lr.ph6: + %48 = getelementptr inbounds i8*** %output1, i32 %34 + %49 = mul nsw i32 %41, %var1.015 + %50 = load i8*** %48, align 4 + %51 = getelementptr inbounds i8** %50, i32 %49 + %52 = getelementptr inbounds %struct.ref3* %32, i32 0, i32 13 + %53 = getelementptr inbounds %struct.ref3* %32, i32 0, i32 18 + %54 = icmp sgt i32 %39, 0 + br i1 %54, label %.lr.ph6.split.us, label %.lr.ph6..lr.ph6.split_crit_edge + +.lr.ph6..lr.ph6.split_crit_edge: + br label %._crit_edge26 + +.lr.ph6.split.us: + %55 = phi i32 [ %63, %._crit_edge28 ], [ %46, %.lr.ph6 ] + %56 = phi i32 [ %64, %._crit_edge28 ], [ %41, %.lr.ph6 ] + %var4.15.us = phi i32 [ %66, %._crit_edge28 ], [ %var4.09, %.lr.ph6 ] + %output2.04.us = phi i8** [ %69, %._crit_edge28 ], [ %51, %.lr.ph6 ] + %var5.03.us = phi i32 [ %67, %._crit_edge28 ], [ 0, %.lr.ph6 ] + %57 = load i32* %21, align 4 + %58 = icmp ult i32 %57, %8 + br i1 %58, label %.lr.ph.us, label %59 + +; <label>:59 + %60 = add nsw i32 %var5.03.us, %var1.015 + %61 = load i32* %53, align 4 + %62 = icmp slt i32 %60, %61 + br i1 %62, label %.lr.ph.us, label %._crit_edge27 + +._crit_edge27: + %63 = phi i32 [ %.pre23.pre, %.loopexit.us ], [ %55, %59 ] + %64 = phi i32 [ %74, %.loopexit.us ], [ %56, %59 ] + %65 = load i32* %52, align 4 + %66 = add nsw i32 %65, %var4.15.us + %67 = add nsw i32 %var5.03.us, 1 + %68 = icmp slt i32 %67, %63 + br i1 %68, label %._crit_edge28, label %._crit_edge + +._crit_edge28: + %69 = getelementptr inbounds i8** %output2.04.us, i32 %64 + br label %.lr.ph6.split.us + +.lr.ph.us: + %var3.02.us = phi i32 [ %75, %.lr.ph.us ], [ %44, %.lr.ph6.split.us ], [ %44, %59 ] + %xindex.01.us = phi i32 [ %76, %.lr.ph.us ], [ 0, %.lr.ph6.split.us ], [ 0, %59 ] + %70 = add nsw i32 %xindex.01.us, %var4.15.us + %71 = getelementptr inbounds [10 x [64 x i16]*]* %23, i32 0, i32 %70 + %72 = load [64 x i16]** %71, align 4 + %73 = getelementptr inbounds [64 x i16]* %72, i32 0, i32 0 + tail call void %37(%struct.ref2* %cinfo, %struct.ref3* %32, i16* %73, i8** %output2.04.us, i32 %var3.02.us) nounwind + %74 = load i32* %40, align 4 + %75 = add i32 %74, %var3.02.us + %76 = add nsw i32 %xindex.01.us, 1 + %exitcond = icmp eq i32 %76, %39 + br i1 %exitcond, label %.loopexit.us, label %.lr.ph.us + +.loopexit.us: + %.pre23.pre = load i32* %45, align 4 + br label %._crit_edge27 + +._crit_edge26: + %var4.15 = phi i32 [ %var4.09, %.lr.ph6..lr.ph6.split_crit_edge ], [ %78, %._crit_edge26 ] + %var5.03 = phi i32 [ 0, %.lr.ph6..lr.ph6.split_crit_edge ], [ %79, %._crit_edge26 ] + %77 = load i32* %52, align 4 + %78 = add nsw i32 %77, %var4.15 + %79 = add nsw i32 %var5.03, 1 + %80 = icmp slt i32 %79, %46 + br i1 %80, label %._crit_edge26, label %._crit_edge + +._crit_edge: + %split = phi i32 [ %66, %._crit_edge27 ], [ %78, %._crit_edge26 ] + %.pre25 = load i32* %19, align 4 + br label %81 + +; <label>:81 + %82 = phi i32 [ %.pre25, %._crit_edge ], [ %30, %.lr.ph10 ] + %var4.1.lcssa = phi i32 [ %split, %._crit_edge ], [ %var4.09, %.lr.ph10 ] + %83 = add nsw i32 %ci.08, 1 + %84 = icmp slt i32 %83, %82 + br i1 %84, label %.lr.ph10, label %._crit_edge11 + +._crit_edge11: + %85 = phi i32 [ %28, %.preheader7 ], [ %82, %81 ] + %86 = add i32 %var2.012, 1 + %87 = icmp ugt i32 %86, %5 + br i1 %87, label %._crit_edge14, label %.preheader7 + +._crit_edge14: + %.pre21 = load i32* %13, align 4 + br label %88 + +; <label>:88 + %89 = phi i32 [ %.pre21, %._crit_edge14 ], [ %25, %24 ] + store void (%struct.ref2*)* null, void (%struct.ref2*)** %18, align 4 + %90 = add nsw i32 %var1.015, 1 + %91 = icmp slt i32 %90, %89 + br i1 %91, label %24, label %._crit_edge19 + +._crit_edge19: + ret i32 3 +} diff --git a/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll new file mode 100644 index 0000000000..a7f249aae3 --- /dev/null +++ b/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll @@ -0,0 +1,213 @@ +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom 2>&1 | \ +; RUN: grep "callq" | not grep "(" +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 2>&1 | \ +; RUN: grep "callq" | grep "8-byte Folded Reload" + +%struct.targettype = type {i64} +%struct.op_ptr1 = type opaque +%struct.op_ptr2 = type opaque +%union.anon = type { [8 x i32], [48 x i8] } +%struct.const1 = type { [64 x i16], i8 } +%struct.const2 = type { [17 x i8], [256 x i8], i8 } +%struct.coef1 = type { %struct.ref1, i32, i32, i32, [10 x [64 x i16]*] } + +%struct.ref1 = type { void (%struct.ref2*)*, i32 (%struct.ref2*)*, void (%struct.ref2*)*, i32 (%struct.ref2*, i8***)*, %struct.op_ptr2** } +%struct.ref2 = type { %struct.localref13*, %struct.localref15*, %struct.localref12*, i8*, i8, i32, %struct.localref11*, i32, i32, i32, i32, i32, i32, i32, double, i8, i8, i32, i8, i8, i8, i32, i8, i32, i8, i8, i8, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.const1*], [4 x %struct.const2*], [4 x %struct.const2*], i32, %struct.ref3*, i8, i8, [16 x i8], [16 x i8], [16 x i8], i32, i8, i8, i8, i8, i16, i16, i8, i8, i8, %struct.localref10*, i32, i32, i32, i32, i8*, i32, [4 x %struct.ref3*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.localref8*, %struct.localref9*, %struct.ref1*, %struct.localref7*, %struct.localref6*, %struct.localref5*, %struct.localref1*, %struct.ref4*, %struct.localref2*, %struct.localref3*, %struct.localref4* } +%struct.ref3 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, i32, %struct.const1*, i8* } +%struct.ref4 = type { void (%struct.ref2*)*, [5 x void (%struct.ref2*, %struct.ref3*, i16*, i8**, i32)*] } + +%struct.localref1 = type { void (%struct.ref2*)*, i8 (%struct.ref2*, [64 x i16]**)*, i8 } +%struct.localref2 = type { void (%struct.ref2*)*, void (%struct.ref2*, i8***, i32*, i32, i8**, i32*, i32)*, i8 } +%struct.localref3 = type { void (%struct.ref2*)*, void (%struct.ref2*, i8***, i32, i8**, i32)* } +%struct.localref4 = type { {}*, void (%struct.ref2*, i8**, i8**, i32)*, void (%struct.ref2*)*, void (%struct.ref2*)* } +%struct.localref5 = type { void (%struct.ref2*)*, i32 (%struct.ref2*)*, i8 (%struct.ref2*)*, i8, i8, i32, i32 } +%struct.localref6 = type { i32 (%struct.ref2*)*, void (%struct.ref2*)*, void (%struct.ref2*)*, void (%struct.ref2*)*, i8, i8 } +%struct.localref7 = type { void (%struct.ref2*, i32)*, void (%struct.ref2*, i8***, i32*, i32, i8**, i32*, i32)* } +%struct.localref8 = type { void (%struct.ref2*)*, void (%struct.ref2*)*, i8 } +%struct.localref9 = type { void (%struct.ref2*, i32)*, void (%struct.ref2*, i8**, i32*, i32)* } +%struct.localref10 = type { %struct.localref10*, i8, i32, i32, i8* } +%struct.localref11 = type { i8*, %struct.targettype, void (%struct.ref2*)*, i8 (%struct.ref2*)*, void (%struct.ref2*, %struct.targettype)*, i8 (%struct.ref2*, i32)*, void (%struct.ref2*)* } +%struct.localref12 = type { {}*, %struct.targettype, %struct.targettype, i32, i32 } +%struct.localref13 = type { void (%struct.localref14*)*, void (%struct.localref14*, i32)*, void (%struct.localref14*)*, void (%struct.localref14*, i8*)*, void (%struct.localref14*)*, i32, %union.anon, i32, %struct.targettype, i8**, i32, i8**, i32, i32 } +%struct.localref14 = type { %struct.localref13*, %struct.localref15*, %struct.localref12*, i8*, i8, i32 } +%struct.localref15 = type { i8* (%struct.localref14*, i32, %struct.targettype)*, i8* (%struct.localref14*, i32, %struct.targettype)*, i8** (%struct.localref14*, i32, i32, i32)*, [64 x i16]** (%struct.localref14*, i32, i32, i32)*, %struct.op_ptr1* (%struct.localref14*, i32, i8, i32, i32, i32)*, %struct.op_ptr2* (%struct.localref14*, i32, i8, i32, i32, i32)*, {}*, i8** (%struct.localref14*, %struct.op_ptr1*, i32, i32, i8)*, [64 x i16]** (%struct.localref14*, %struct.op_ptr2*, i32, i32, i8)*, void (%struct.localref14*, i32)*, {}*, %struct.targettype, %struct.targettype} + +define internal i32 @foldedreload(%struct.ref2* %cinfo, i8*** nocapture %output1) { + %1 = getelementptr inbounds %struct.ref2* %cinfo, i64 0, i32 79 + %2 = load %struct.ref1** %1, align 8 + %3 = bitcast %struct.ref1* %2 to %struct.coef1* + %4 = getelementptr inbounds %struct.ref2* %cinfo, i64 0, i32 68 + %5 = load i32* %4, align 4 + %6 = add i32 %5, -1 + %7 = getelementptr inbounds %struct.ref2* %cinfo, i64 0, i32 64 + %8 = load i32* %7, align 4 + %9 = add i32 %8, -1 + %10 = getelementptr inbounds %struct.coef1* %3, i64 0, i32 2 + %11 = load i32* %10, align 4 + %12 = getelementptr inbounds %struct.ref1* %2, i64 1, i32 1 + %13 = bitcast i32 (%struct.ref2*)** %12 to i32* + %14 = load i32* %13, align 4 + %15 = icmp slt i32 %11, %14 + br i1 %15, label %.lr.ph18, label %._crit_edge19 + +.lr.ph18: + %16 = getelementptr inbounds %struct.ref1* %2, i64 1 + %17 = bitcast %struct.ref1* %16 to i32* + %18 = getelementptr inbounds %struct.ref2* %cinfo, i64 0, i32 66 + %19 = getelementptr inbounds %struct.ref2* %cinfo, i64 0, i32 84 + %20 = getelementptr inbounds %struct.ref2* %cinfo, i64 0, i32 36 + %21 = getelementptr inbounds %struct.ref1* %2, i64 1, i32 2 + %22 = bitcast void (%struct.ref2*)** %21 to [10 x [64 x i16]*]* + %.pre = load i32* %17, align 4 + br label %23 + +; <label>:23 + %24 = phi i32 [ %14, %.lr.ph18 ], [ %92, %91 ] + %25 = phi i32 [ %.pre, %.lr.ph18 ], [ 0, %91 ] + %var1.015 = phi i32 [ %11, %.lr.ph18 ], [ %93, %91 ] + %26 = icmp ugt i32 %25, %6 + br i1 %26, label %91, label %.preheader7.lr.ph + +.preheader7.lr.ph: + %.pre24 = load i32* %18, align 4 + br label %.preheader7 + +.preheader7: + %27 = phi i32 [ %.pre24, %.preheader7.lr.ph ], [ %88, %._crit_edge11 ] + %var2.012 = phi i32 [ %25, %.preheader7.lr.ph ], [ %89, %._crit_edge11 ] + %28 = icmp sgt i32 %27, 0 + br i1 %28, label %.lr.ph10, label %._crit_edge11 + +.lr.ph10: + %29 = phi i32 [ %27, %.preheader7 ], [ %85, %84 ] + %indvars.iv21 = phi i64 [ 0, %.preheader7 ], [ %indvars.iv.next22, %84 ] + %var4.09 = phi i32 [ 0, %.preheader7 ], [ %var4.1.lcssa, %84 ] + %30 = getelementptr inbounds %struct.ref2* %cinfo, i64 0, i32 67, i64 %indvars.iv21 + %31 = load %struct.ref3** %30, align 8 + %32 = getelementptr inbounds %struct.ref3* %31, i64 0, i32 1 + %33 = load i32* %32, align 4 + %34 = sext i32 %33 to i64 + %35 = load %struct.ref4** %19, align 8 + %36 = getelementptr inbounds %struct.ref4* %35, i64 0, i32 1, i64 %34 + %37 = load void (%struct.ref2*, %struct.ref3*, i16*, i8**, i32)** %36, align 8 + %38 = getelementptr inbounds %struct.ref3* %31, i64 0, i32 17 + %39 = load i32* %38, align 4 + %40 = getelementptr inbounds %struct.ref3* %31, i64 0, i32 9 + %41 = load i32* %40, align 4 + %42 = getelementptr inbounds %struct.ref3* %31, i64 0, i32 16 + %43 = load i32* %42, align 4 + %44 = mul i32 %43, %var2.012 + %45 = getelementptr inbounds %struct.ref3* %31, i64 0, i32 14 + %46 = load i32* %45, align 4 + %47 = icmp sgt i32 %46, 0 + br i1 %47, label %.lr.ph6, label %84 + +.lr.ph6: + %48 = mul nsw i32 %41, %var1.015 + %49 = getelementptr inbounds i8*** %output1, i64 %34 + %50 = sext i32 %48 to i64 + %51 = load i8*** %49, align 8 + %52 = getelementptr inbounds i8** %51, i64 %50 + %53 = getelementptr inbounds %struct.ref3* %31, i64 0, i32 13 + %54 = getelementptr inbounds %struct.ref3* %31, i64 0, i32 18 + %55 = icmp sgt i32 %39, 0 + br i1 %55, label %.lr.ph6.split.us, label %.lr.ph6..lr.ph6.split_crit_edge + +.lr.ph6..lr.ph6.split_crit_edge: + br label %._crit_edge28 + +.lr.ph6.split.us: + %56 = phi i32 [ %64, %._crit_edge30 ], [ %46, %.lr.ph6 ] + %57 = phi i32 [ %65, %._crit_edge30 ], [ %41, %.lr.ph6 ] + %var4.15.us = phi i32 [ %67, %._crit_edge30 ], [ %var4.09, %.lr.ph6 ] + %output2.04.us = phi i8** [ %71, %._crit_edge30 ], [ %52, %.lr.ph6 ] + %var5.03.us = phi i32 [ %68, %._crit_edge30 ], [ 0, %.lr.ph6 ] + %58 = load i32* %20, align 4 + %59 = icmp ult i32 %58, %9 + br i1 %59, label %.lr.ph.us, label %60 + +; <label>:60 + %61 = add nsw i32 %var5.03.us, %var1.015 + %62 = load i32* %54, align 4 + %63 = icmp slt i32 %61, %62 + br i1 %63, label %.lr.ph.us, label %._crit_edge29 + +._crit_edge29: + %64 = phi i32 [ %.pre25.pre, %.loopexit.us ], [ %56, %60 ] + %65 = phi i32 [ %77, %.loopexit.us ], [ %57, %60 ] + %66 = load i32* %53, align 4 + %67 = add nsw i32 %66, %var4.15.us + %68 = add nsw i32 %var5.03.us, 1 + %69 = icmp slt i32 %68, %64 + br i1 %69, label %._crit_edge30, label %._crit_edge + +._crit_edge30: + %70 = sext i32 %65 to i64 + %71 = getelementptr inbounds i8** %output2.04.us, i64 %70 + br label %.lr.ph6.split.us + +; <label>:72 + %indvars.iv = phi i64 [ 0, %.lr.ph.us ], [ %indvars.iv.next, %72 ] + %var3.02.us = phi i32 [ %44, %.lr.ph.us ], [ %78, %72 ] + %73 = add nsw i64 %indvars.iv, %79 + %74 = getelementptr inbounds [10 x [64 x i16]*]* %22, i64 0, i64 %73 + %75 = load [64 x i16]** %74, align 8 + %76 = getelementptr inbounds [64 x i16]* %75, i64 0, i64 0 + tail call void %37(%struct.ref2* %cinfo, %struct.ref3* %31, i16* %76, i8** %output2.04.us, i32 %var3.02.us) nounwind + %77 = load i32* %40, align 4 + %78 = add i32 %77, %var3.02.us + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %39 + br i1 %exitcond, label %.loopexit.us, label %72 + +.loopexit.us: + %.pre25.pre = load i32* %45, align 4 + br label %._crit_edge29 + +.lr.ph.us: + %79 = sext i32 %var4.15.us to i64 + br label %72 + +._crit_edge28: + %var4.15 = phi i32 [ %var4.09, %.lr.ph6..lr.ph6.split_crit_edge ], [ %81, %._crit_edge28 ] + %var5.03 = phi i32 [ 0, %.lr.ph6..lr.ph6.split_crit_edge ], [ %82, %._crit_edge28 ] + %80 = load i32* %53, align 4 + %81 = add nsw i32 %80, %var4.15 + %82 = add nsw i32 %var5.03, 1 + %83 = icmp slt i32 %82, %46 + br i1 %83, label %._crit_edge28, label %._crit_edge + +._crit_edge: + %split = phi i32 [ %67, %._crit_edge29 ], [ %81, %._crit_edge28 ] + %.pre27 = load i32* %18, align 4 + br label %84 + +; <label>:84 + %85 = phi i32 [ %.pre27, %._crit_edge ], [ %29, %.lr.ph10 ] + %var4.1.lcssa = phi i32 [ %split, %._crit_edge ], [ %var4.09, %.lr.ph10 ] + %indvars.iv.next22 = add i64 %indvars.iv21, 1 + %86 = trunc i64 %indvars.iv.next22 to i32 + %87 = icmp slt i32 %86, %85 + br i1 %87, label %.lr.ph10, label %._crit_edge11 + +._crit_edge11: + %88 = phi i32 [ %27, %.preheader7 ], [ %85, %84 ] + %89 = add i32 %var2.012, 1 + %90 = icmp ugt i32 %89, %6 + br i1 %90, label %._crit_edge14, label %.preheader7 + +._crit_edge14: + %.pre23 = load i32* %13, align 4 + br label %91 + +; <label>:91 + %92 = phi i32 [ %.pre23, %._crit_edge14 ], [ %24, %23 ] + store i32 0, i32* %17, align 4 + %93 = add nsw i32 %var1.015, 1 + %94 = icmp slt i32 %93, %92 + br i1 %94, label %23, label %._crit_edge19 + +._crit_edge19: + ret i32 3 +} diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll new file mode 100644 index 0000000000..632781130d --- /dev/null +++ b/test/CodeGen/X86/atom-call-reg-indirect.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM32 %s +; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s +; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM64 %s +; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s + + +; fn_ptr.ll +%class.A = type { i32 (...)** } + +define i32 @test1() #0 { + ;ATOM: test1 +entry: + %call = tail call %class.A* @_Z3facv() + %0 = bitcast %class.A* %call to void (%class.A*)*** + %vtable = load void (%class.A*)*** %0, align 8 + %1 = load void (%class.A*)** %vtable, align 8 + ;ATOM32: movl (%ecx), %ecx + ;ATOM32: calll *%ecx + ;ATOM-NOT32: calll *(%ecx) + ;ATOM64: movq (%rcx), %rcx + ;ATOM64: callq *%rcx + ;ATOM-NOT64: callq *(%rcx) + tail call void %1(%class.A* %call) + ret i32 0 +} + +declare %class.A* @_Z3facv() #1 + +; virt_fn.ll +@p = external global void (i32)** + +define i32 @test2() #0 { + ;ATOM: test2 +entry: + %0 = load void (i32)*** @p, align 8 + %1 = load void (i32)** %0, align 8 + ;ATOM32: movl (%eax), %eax + ;ATOM32: calll *%eax + ;ATOM-NOT: calll *(%eax) + ;ATOM64: movq (%rax), %rax + ;ATOM64: callq *%rax + ;ATOM-NOT64: callq *(%rax) + tail call void %1(i32 2) + ret i32 0 +} diff --git a/test/CodeGen/X86/atomic32.ll b/test/CodeGen/X86/atomic32.ll index 3ea96bf169..080828702e 100644 --- a/test/CodeGen/X86/atomic32.ll +++ b/test/CodeGen/X86/atomic32.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X32 ; RUN: llc < %s -O0 -march=x86 -mcpu=corei7 -mattr=-cmov -verify-machineinstrs | FileCheck %s --check-prefix NOCMOV -; XFAIL: cygwin,mingw32,win32 +; XFAIL: cygwin,mingw32 @sc32 = external global i32 diff --git a/test/CodeGen/X86/atomic64.ll b/test/CodeGen/X86/atomic64.ll index d362c31e6d..05939e8fc6 100644 --- a/test/CodeGen/X86/atomic64.ll +++ b/test/CodeGen/X86/atomic64.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -O0 -march=x86-64 -mcpu=corei7 -verify-machineinstrs | FileCheck %s --check-prefix X64 -; XFAIL: cygwin,mingw32,win32 +; XFAIL: cygwin,mingw32 @sc64 = external global i64 diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll index 62bdea2b49..22fad7ce4b 100644 --- a/test/CodeGen/X86/avx-cvt.ll +++ b/test/CodeGen/X86/avx-cvt.ll @@ -18,6 +18,12 @@ define <4 x double> @sitofp01(<4 x i32> %a) { ret <4 x double> %b } +; CHECK: vcvtdq2ps %ymm +define <8 x float> @sitofp02(<8 x i16> %a) { + %b = sitofp <8 x i16> %a to <8 x float> + ret <8 x float> %b +} + ; CHECK: vcvttpd2dqy %ymm define <4 x i32> @fptosi01(<4 x double> %a) { %b = fptosi <4 x double> %a to <4 x i32> diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll index 77a7c4f945..a6775aba09 100644 --- a/test/CodeGen/X86/avx-load-store.ll +++ b/test/CodeGen/X86/avx-load-store.ll @@ -55,7 +55,7 @@ define void @storev16i16(<16 x i16> %a) nounwind { ; CHECK: storev16i16_01 ; CHECK: vextractf128 -; CHECK: vmovaps %xmm +; CHECK: vmovups %xmm define void @storev16i16_01(<16 x i16> %a) nounwind { store <16 x i16> %a, <16 x i16>* undef, align 4 unreachable @@ -81,7 +81,7 @@ define void @storev32i8_01(<32 x i8> %a) nounwind { ; CHECK: _double_save ; CHECK-NOT: vinsertf128 $1 ; CHECK-NOT: vinsertf128 $0 -; CHECK: vmovups %xmm +; CHECK: vmovaps %xmm ; CHECK: vmovaps %xmm define void @double_save(<4 x i32> %A, <4 x i32> %B, <8 x i32>* %P) nounwind ssp { entry: @@ -114,3 +114,38 @@ cif_mixed_test_any_check: ; preds = %cif_mask_mixed unreachable } +; CHECK: add8i32 +; CHECK: vmovups +; CHECK: vmovups +; CHECK-NOT: vinsertf128 +; CHECK-NOT: vextractf128 +; CHECK: vmovups +; CHECK: vmovups +define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind { + %b = load <8 x i32>* %bp, align 1 + %x = add <8 x i32> zeroinitializer, %b + store <8 x i32> %x, <8 x i32>* %ret, align 1 + ret void +} + +; CHECK: add4i64a64 +; CHECK: vmovaps ({{.*}}), %ymm{{.*}} +; CHECK: vmovaps %ymm{{.*}}, ({{.*}}) +define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind { + %b = load <4 x i64>* %bp, align 64 + %x = add <4 x i64> zeroinitializer, %b + store <4 x i64> %x, <4 x i64>* %ret, align 64 + ret void +} + +; CHECK: add4i64a16 +; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}} +; CHECK: vmovaps {{.*}}({{.*}}), %xmm{{.*}} +; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}}) +; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}}) +define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind { + %b = load <4 x i64>* %bp, align 16 + %x = add <4 x i64> zeroinitializer, %b + store <4 x i64> %x, <4 x i64>* %ret, align 16 + ret void +} diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll index 7ae0d36c08..b9c7000510 100755 --- a/test/CodeGen/X86/avx-sext.ll +++ b/test/CodeGen/X86/avx-sext.ll @@ -165,3 +165,24 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { ret <4 x i64> %extmask } +; AVX: sext_4i8_to_4i64 +; AVX: vpmovsxbd +; AVX: vpmovsxdq +; AVX: vpmovsxdq +; AVX: ret +define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { + %X = load <4 x i8>* %ptr + %Y = sext <4 x i8> %X to <4 x i64> + ret <4 x i64>%Y +} + +; AVX: sext_4i16_to_4i64 +; AVX: vpmovsxwd +; AVX: vpmovsxdq +; AVX: vpmovsxdq +; AVX: ret +define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { + %X = load <4 x i16>* %ptr + %Y = sext <4 x i16> %X to <4 x i64> + ret <4 x i64>%Y +} diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll index 09eb5d1038..fa6f6e85e9 100644 --- a/test/CodeGen/X86/bool-simplify.ll +++ b/test/CodeGen/X86/bool-simplify.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand | FileCheck %s +; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx,+rdrand,+rdseed | FileCheck %s define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) { %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c) @@ -39,7 +39,22 @@ define i32 @bax(<2 x i64> %c) { ; CHECK: ret } -define i32 @rnd(i32 %arg) nounwind uwtable { +define i16 @rnd16(i16 %arg) nounwind uwtable { + %1 = tail call { i16, i32 } @llvm.x86.rdrand.16() nounwind + %2 = extractvalue { i16, i32 } %1, 0 + %3 = extractvalue { i16, i32 } %1, 1 + %4 = icmp eq i32 %3, 0 + %5 = select i1 %4, i16 0, i16 %arg + %6 = add i16 %5, %2 + ret i16 %6 +; CHECK: rnd16 +; CHECK: rdrand +; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: ret +} + +define i32 @rnd32(i32 %arg) nounwind uwtable { %1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind %2 = extractvalue { i32, i32 } %1, 0 %3 = extractvalue { i32, i32 } %1, 1 @@ -47,12 +62,77 @@ define i32 @rnd(i32 %arg) nounwind uwtable { %5 = select i1 %4, i32 0, i32 %arg %6 = add i32 %5, %2 ret i32 %6 -; CHECK: rnd +; CHECK: rnd32 ; CHECK: rdrand ; CHECK: cmov ; CHECK-NOT: cmov ; CHECK: ret } +define i64 @rnd64(i64 %arg) nounwind uwtable { + %1 = tail call { i64, i32 } @llvm.x86.rdrand.64() nounwind + %2 = extractvalue { i64, i32 } %1, 0 + %3 = extractvalue { i64, i32 } %1, 1 + %4 = icmp eq i32 %3, 0 + %5 = select i1 %4, i64 0, i64 %arg + %6 = add i64 %5, %2 + ret i64 %6 +; CHECK: rnd64 +; CHECK: rdrand +; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: ret +} + +define i16 @seed16(i16 %arg) nounwind uwtable { + %1 = tail call { i16, i32 } @llvm.x86.rdseed.16() nounwind + %2 = extractvalue { i16, i32 } %1, 0 + %3 = extractvalue { i16, i32 } %1, 1 + %4 = icmp eq i32 %3, 0 + %5 = select i1 %4, i16 0, i16 %arg + %6 = add i16 %5, %2 + ret i16 %6 +; CHECK: seed16 +; CHECK: rdseed +; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: ret +} + +define i32 @seed32(i32 %arg) nounwind uwtable { + %1 = tail call { i32, i32 } @llvm.x86.rdseed.32() nounwind + %2 = extractvalue { i32, i32 } %1, 0 + %3 = extractvalue { i32, i32 } %1, 1 + %4 = icmp eq i32 %3, 0 + %5 = select i1 %4, i32 0, i32 %arg + %6 = add i32 %5, %2 + ret i32 %6 +; CHECK: seed32 +; CHECK: rdseed +; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: ret +} + +define i64 @seed64(i64 %arg) nounwind uwtable { + %1 = tail call { i64, i32 } @llvm.x86.rdseed.64() nounwind + %2 = extractvalue { i64, i32 } %1, 0 + %3 = extractvalue { i64, i32 } %1, 1 + %4 = icmp eq i32 %3, 0 + %5 = select i1 %4, i64 0, i64 %arg + %6 = add i64 %5, %2 + ret i64 %6 +; CHECK: seed64 +; CHECK: rdseed +; CHECK: cmov +; CHECK-NOT: cmov +; CHECK: ret +} + declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone +declare { i16, i32 } @llvm.x86.rdrand.16() nounwind declare { i32, i32 } @llvm.x86.rdrand.32() nounwind +declare { i64, i32 } @llvm.x86.rdrand.64() nounwind +declare { i16, i32 } @llvm.x86.rdseed.16() nounwind +declare { i32, i32 } @llvm.x86.rdseed.32() nounwind +declare { i64, i32 } @llvm.x86.rdseed.64() nounwind diff --git a/test/CodeGen/X86/complex-fca.ll b/test/CodeGen/X86/complex-fca.ll index 7e7acaa98a..8ad38a4ee5 100644 --- a/test/CodeGen/X86/complex-fca.ll +++ b/test/CodeGen/X86/complex-fca.ll @@ -1,5 +1,8 @@ ; RUN: llc < %s -march=x86 | grep mov | count 2 +; Skip this on Windows as there is no ccosl and sret behaves differently. +; XFAIL: pc-win32 + define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind { entry: %z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0 diff --git a/test/CodeGen/X86/Stats/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll index 4be14d2128..4a0110896c 100644 --- a/test/CodeGen/X86/Stats/constant-pool-remat-0.ll +++ b/test/CodeGen/X86/constant-pool-remat-0.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-linux -regalloc=greedy | FileCheck %s ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s diff --git a/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll index 064ee364d1..74a7240c81 100644 --- a/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll +++ b/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS ; STATS: 9 asm-printer diff --git a/test/CodeGen/X86/Stats/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll index af69531246..75d3d93ddb 100644 --- a/test/CodeGen/X86/Stats/dagcombine-cse.ll +++ b/test/CodeGen/X86/dagcombine-cse.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14 define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind { diff --git a/test/CodeGen/X86/dagcombine_unsafe_math.ll b/test/CodeGen/X86/dagcombine_unsafe_math.ll index a3221dea5b..592cf1bec2 100644 --- a/test/CodeGen/X86/dagcombine_unsafe_math.ll +++ b/test/CodeGen/X86/dagcombine_unsafe_math.ll @@ -40,3 +40,17 @@ define float @test4(float %x, float %y) { ; CHECK: test4 ; CHECK: vaddss } + +; rdar://13445387 +; "x + x + x => 3.0 * x" should be disabled after legalization because +; Instruction-Selection dosen't know how to handle "3.0" +; +define float @test5() { + %mul.i.i151 = fmul <4 x float> zeroinitializer, zeroinitializer + %vecext.i8.i152 = extractelement <4 x float> %mul.i.i151, i32 1 + %vecext1.i9.i153 = extractelement <4 x float> %mul.i.i151, i32 0 + %add.i10.i154 = fadd float %vecext1.i9.i153, %vecext.i8.i152 + %vecext.i7.i155 = extractelement <4 x float> %mul.i.i151, i32 2 + %add.i.i156 = fadd float %vecext.i7.i155, %add.i10.i154 + ret float %add.i.i156 +} diff --git a/test/CodeGen/X86/dbg-byval-parameter.ll b/test/CodeGen/X86/dbg-byval-parameter.ll index 25c946bbb6..aca06a27a1 100644 --- a/test/CodeGen/X86/dbg-byval-parameter.ll +++ b/test/CodeGen/X86/dbg-byval-parameter.ll @@ -26,23 +26,24 @@ return: ; preds = %entry declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!3} -!18 = metadata !{metadata !1} !0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !18, null} ; [ DW_TAG_compile_unit ] -!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6, metadata !7} -!6 = metadata !{i32 786468, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] -!7 = metadata !{i32 786451, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ] +!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ] !8 = metadata !{metadata !9, metadata !14} -!9 = metadata !{i32 786445, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] -!10 = metadata !{i32 786451, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] +!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] +!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] !11 = metadata !{metadata !12, metadata !13} -!12 = metadata !{i32 786445, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] -!13 = metadata !{i32 786445, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ] -!14 = metadata !{i32 786445, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ] +!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] +!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ] +!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ] !15 = metadata !{i32 11, i32 0, metadata !1, null} !16 = metadata !{i32 12, i32 0, metadata !17, null} -!17 = metadata !{i32 786443, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ] +!17 = metadata !{i32 786443, metadata !2, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ] +!18 = metadata !{metadata !1} +!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"} diff --git a/test/CodeGen/X86/dbg-const-int.ll b/test/CodeGen/X86/dbg-const-int.ll index 1f9c43ad9a..aabc206806 100644 --- a/test/CodeGen/X86/dbg-const-int.ll +++ b/test/CodeGen/X86/dbg-const-int.ll @@ -13,17 +13,18 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!11 = metadata !{metadata !1} -!12 = metadata !{metadata !6} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 132191)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ] +!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, null, null, metadata !11, null, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] -!7 = metadata !{i32 786443, metadata !1, i32 1, i32 11, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ] !8 = metadata !{i32 42} !9 = metadata !{i32 2, i32 12, metadata !7, null} !10 = metadata !{i32 3, i32 2, metadata !7, null} +!11 = metadata !{metadata !1} +!12 = metadata !{metadata !6} +!13 = metadata !{metadata !"a.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/X86/dbg-const.ll b/test/CodeGen/X86/dbg-const.ll index cd60e533cc..a9b8f1fdc4 100644 --- a/test/CodeGen/X86/dbg-const.ll +++ b/test/CodeGen/X86/dbg-const.ll @@ -17,19 +17,20 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone declare i32 @bar() nounwind readnone !llvm.dbg.cu = !{!2} -!13 = metadata !{metadata !0} -!14 = metadata !{metadata !6} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} -!1 = metadata !{i32 786473, metadata !"mu.c", metadata !"/private/tmp", metadata !2} -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"mu.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114183)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !13, null} +!0 = metadata !{i32 786478, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, null, null, metadata !13, null, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} !6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null} -!7 = metadata !{i32 786443, metadata !0, i32 12, i32 52, metadata !1, i32 0} +!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 12, i32 52, i32 0} ; [ DW_TAG_lexical_block ] !8 = metadata !{i32 42} !9 = metadata !{i32 15, i32 12, metadata !7, null} !10 = metadata !{i32 23, i32 3, metadata !7, null} !11 = metadata !{i32 17, i32 3, metadata !7, null} !12 = metadata !{i32 18, i32 3, metadata !7, null} +!13 = metadata !{metadata !0} +!14 = metadata !{metadata !6} +!15 = metadata !{metadata !"mu.c", metadata !"/private/tmp"} diff --git a/test/CodeGen/X86/dbg-declare-arg.ll b/test/CodeGen/X86/dbg-declare-arg.ll index 957f37bd87..f7e0c91cdf 100644 --- a/test/CodeGen/X86/dbg-declare-arg.ll +++ b/test/CodeGen/X86/dbg-declare-arg.ll @@ -70,34 +70,33 @@ entry: } !llvm.dbg.cu = !{!2} -!50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ] +!0 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null} ; [ DW_TAG_subprogram ] !1 = metadata !{i32 589826, metadata !2, metadata !"A", metadata !3, i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ] -!2 = metadata !{i32 786449, i32 0, i32 4, metadata !"a.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 130127)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786473, metadata !"a.cc", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, null, null, metadata !50, null, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ] !4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14} !5 = metadata !{i32 786445, metadata !3, metadata !"x", metadata !3, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] !6 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !7 = metadata !{i32 786445, metadata !3, metadata !"y", metadata !3, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ] !8 = metadata !{i32 786445, metadata !3, metadata !"z", metadata !3, i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ] !9 = metadata !{i32 786445, metadata !3, metadata !"o", metadata !3, i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ] -!10 = metadata !{i32 786478, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ] !11 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !12 = metadata !{null, metadata !13} !13 = metadata !{i32 786447, metadata !2, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ] -!14 = metadata !{i32 786478, i32 0, metadata !1, metadata !"A", metadata !"A", metadata !"", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ] +!14 = metadata !{i32 786478, metadata !"", i32 0, metadata !1, metadata !"A", metadata !"A", metadata !3, i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null} ; [ DW_TAG_subprogram ] !15 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !16 = metadata !{null, metadata !13, metadata !17} !17 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ] !18 = metadata !{i32 786470, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ] -!19 = metadata !{i32 786478, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ] +!19 = metadata !{i32 786478, metadata !"_Z3fooi", i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !3, i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null} ; [ DW_TAG_subprogram ] !20 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !21 = metadata !{metadata !1} -!22 = metadata !{i32 786478, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ] +!22 = metadata !{i32 786478, metadata !"_ZN1AD1Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null} ; [ DW_TAG_subprogram ] !23 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !24 = metadata !{null} -!25 = metadata !{i32 786478, i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ] +!25 = metadata !{i32 786478, metadata !"_ZN1AD2Ev", i32 0, metadata !3, metadata !"~A", metadata !"~A", metadata !3, i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null} ; [ DW_TAG_subprogram ] !26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] !27 = metadata !{i32 4, i32 11, metadata !19, null} !28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ] @@ -122,3 +121,5 @@ entry: !47 = metadata !{i32 2, i32 47, metadata !25, null} !48 = metadata !{i32 2, i32 54, metadata !49, null} !49 = metadata !{i32 786443, metadata !25, i32 2, i32 52, metadata !3, i32 2} ; [ DW_TAG_lexical_block ] +!50 = metadata !{metadata !0, metadata !10, metadata !14, metadata !19, metadata !22, metadata !25} +!51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"} diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll index 9d13de16da..6ac397ac42 100644 --- a/test/CodeGen/X86/dbg-declare.ll +++ b/test/CodeGen/X86/dbg-declare.ll @@ -29,10 +29,10 @@ declare void @llvm.stackrestore(i8*) nounwind !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9, metadata !10} diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll index 0a526100e6..1bd3d77522 100644 --- a/test/CodeGen/X86/dbg-file-name.ll +++ b/test/CodeGen/X86/dbg-file-name.ll @@ -10,11 +10,12 @@ define i32 @main() nounwind { } !llvm.dbg.cu = !{!2} -!9 = metadata !{metadata !6} -!1 = metadata !{i32 786473, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"simple.c", metadata !"/Users/manav/one/two", metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ] !5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !5} +!9 = metadata !{metadata !6} +!10 = metadata !{metadata !"simple.c", metadata !"/Users/manav/one/two"} diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll index 725b5b73d5..17d645757d 100644 --- a/test/CodeGen/X86/dbg-i128-const.ll +++ b/test/CodeGen/X86/dbg-i128-const.ll @@ -13,17 +13,19 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!5} -!12 = metadata !{metadata !3} !0 = metadata !{i128 42 } !1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ] -!2 = metadata !{i32 786443, metadata !3, i32 26, i32 0, metadata !4, i32 0} ; [ DW_TAG_lexical_block ] -!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ] -!4 = metadata !{i32 786473, metadata !"foo.c", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ] -!5 = metadata !{i32 786449, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !12, null} ; [ DW_TAG_compile_unit ] -!6 = metadata !{i32 786453, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] +!2 = metadata !{i32 786443, metadata !4, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ] +!3 = metadata !{i32 786478, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ] +!4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786449, i32 1, metadata !4, metadata !"clang", i1 true, metadata !"", i32 0, null, null, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ] +!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] !7 = metadata !{metadata !8, metadata !8, metadata !8} -!8 = metadata !{i32 786454, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] -!9 = metadata !{i32 786473, metadata !"myint.h", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ] -!10 = metadata !{i32 786468, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] +!9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ] +!10 = metadata !{i32 786468, metadata !13, metadata !4, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !11 = metadata !{i32 29, i32 0, metadata !2, null} +!12 = metadata !{metadata !3} +!13 = metadata !{metadata !"foo.c", metadata !"/tmp"} +!14 = metadata !{metadata !"myint.h", metadata !"/tmp"} diff --git a/test/CodeGen/X86/dbg-large-unsigned-const.ll b/test/CodeGen/X86/dbg-large-unsigned-const.ll index 9a767621d9..ff16318efc 100644 --- a/test/CodeGen/X86/dbg-large-unsigned-const.ll +++ b/test/CodeGen/X86/dbg-large-unsigned-const.ll @@ -30,13 +30,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !30 = metadata !{metadata !7, metadata !11} !31 = metadata !{metadata !12} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"lli.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 135593)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !29, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !"_Z3iseRKxS0_", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ] +!0 = metadata !{i32 786449, i32 4, metadata !2, metadata !"clang version 3.0 (trunk 135593)", i1 true, metadata !"", i32 0, null, null, metadata !29, null, metadata !""} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !"_Z3iseRKxS0_", i32 0, metadata !2, metadata !"ise", metadata !"ise", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64*, i64*)* @_Z3iseRKxS0_, null, null, metadata !30, i32 2} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786473, metadata !"lli.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !"_Z2fnx", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786478, metadata !"_Z2fnx", i32 0, metadata !2, metadata !"fn", metadata !"fn", metadata !2, i32 6, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (i64)* @_Z2fnx, null, null, metadata !31, i32 6} ; [ DW_TAG_subprogram ] !7 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !8 = metadata !{i32 786448, metadata !0, null, null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ] !9 = metadata !{i32 786470, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] @@ -46,14 +46,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !13 = metadata !{i32 2, i32 27, metadata !1, null} !14 = metadata !{i32 2, i32 49, metadata !1, null} !15 = metadata !{i32 3, i32 3, metadata !16, null} -!16 = metadata !{i32 786443, metadata !1, i32 2, i32 54, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 786443, metadata !2, metadata !1, i32 2, i32 54, i32 0} ; [ DW_TAG_lexical_block ] !17 = metadata !{metadata !"long long", metadata !18} !18 = metadata !{metadata !"omnipotent char", metadata !19} !19 = metadata !{metadata !"Simple C/C++ TBAA", null} !20 = metadata !{i32 6, i32 19, metadata !6, null} !21 = metadata !{i32 786689, metadata !1, metadata !"LHS", metadata !2, i32 16777218, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ] !22 = metadata !{i32 7, i32 10, metadata !23, null} -!23 = metadata !{i32 786443, metadata !6, i32 6, i32 22, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] +!23 = metadata !{i32 786443, metadata !2, metadata !6, i32 6, i32 22, i32 1} ; [ DW_TAG_lexical_block ] !24 = metadata !{i32 2, i32 27, metadata !1, metadata !22} !25 = metadata !{i64 9223372036854775807} !26 = metadata !{i32 786689, metadata !1, metadata !"RHS", metadata !2, i32 33554434, metadata !8, i32 0, metadata !22} ; [ DW_TAG_arg_variable ] diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll index 4e5078a31b..baad6c0b60 100644 --- a/test/CodeGen/X86/dbg-merge-loc-entry.ll +++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll @@ -44,33 +44,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone !llvm.dbg.cu = !{!2} -!28 = metadata !{metadata !0, metadata !9} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !28, null} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786478, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !28, null, metadata !""} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8} -!5 = metadata !{i32 786454, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] -!6 = metadata !{i32 786473, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786468, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] -!8 = metadata !{i32 786447, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ] -!9 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ] -!10 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] +!6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ] +!9 = metadata !{i32 786478, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] !11 = metadata !{metadata !12, metadata !12, metadata !12} -!12 = metadata !{i32 786454, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ] -!13 = metadata !{i32 786468, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ] +!13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !14 = metadata !{i32 786689, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ] !15 = metadata !{i32 1093, i32 0, metadata !9, null} !16 = metadata !{i64 0} !17 = metadata !{i32 786688, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0, null} ; [ DW_TAG_auto_variable ] -!18 = metadata !{i32 786443, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ] -!19 = metadata !{i32 786454, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ] -!20 = metadata !{i32 786468, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!18 = metadata !{i32 786443, metadata !1, metadata !9, i32 1094, i32 0, i32 13} ; [ DW_TAG_lexical_block ] +!19 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"word_type", i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ] +!20 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !21 = metadata !{i32 1095, i32 0, metadata !18, null} !22 = metadata !{i32 1103, i32 0, metadata !18, null} !23 = metadata !{i32 1104, i32 0, metadata !18, null} !24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26} -!25 = metadata !{i32 786443, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!25 = metadata !{i32 786443, metadata !1, metadata !0, i32 879, i32 0, i32 0} ; [ DW_TAG_lexical_block ] !26 = metadata !{i32 1107, i32 0, metadata !18, null} !27 = metadata !{i32 1111, i32 0, metadata !18, null} +!28 = metadata !{metadata !0, metadata !9} +!29 = metadata !{metadata !"foobar.c", metadata !"/tmp"} +!30 = metadata !{metadata !"foobar.h", metadata !"/tmp"} diff --git a/test/CodeGen/X86/dbg-prolog-end.ll b/test/CodeGen/X86/dbg-prolog-end.ll index 868017cd21..26bac2e082 100644 --- a/test/CodeGen/X86/dbg-prolog-end.ll +++ b/test/CodeGen/X86/dbg-prolog-end.ll @@ -35,21 +35,21 @@ entry: !llvm.dbg.cu = !{!0} !18 = metadata !{metadata !1, metadata !6} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 131100)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !18, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] +!0 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, null, null, metadata !18, null, metadata !""} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786473, metadata !"/tmp/a.c", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786478, metadata !2, metadata !"main", metadata !"main", metadata !"", metadata !2, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ] !7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !8 = metadata !{i32 1, i32 13, metadata !1, null} !9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] -!10 = metadata !{i32 786443, metadata !1, i32 1, i32 16, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!10 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 16, i32 0} ; [ DW_TAG_lexical_block ] !11 = metadata !{i32 2, i32 6, metadata !10, null} !12 = metadata !{i32 2, i32 11, metadata !10, null} !13 = metadata !{i32 3, i32 2, metadata !10, null} !14 = metadata !{i32 4, i32 2, metadata !10, null} !15 = metadata !{i32 5, i32 2, metadata !10, null} !16 = metadata !{i32 8, i32 2, metadata !17, null} -!17 = metadata !{i32 786443, metadata !6, i32 7, i32 12, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] +!17 = metadata !{i32 786443, metadata !2, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ] diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll index 2f590ee8cc..6090185dc1 100644 --- a/test/CodeGen/X86/dbg-subrange.ll +++ b/test/CodeGen/X86/dbg-subrange.ll @@ -14,10 +14,10 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null} diff --git a/test/CodeGen/X86/dbg-value-dag-combine.ll b/test/CodeGen/X86/dbg-value-dag-combine.ll index 0066cd0498..fcbf64f423 100644 --- a/test/CodeGen/X86/dbg-value-dag-combine.ll +++ b/test/CodeGen/X86/dbg-value-dag-combine.ll @@ -24,13 +24,10 @@ entry: ret void, !dbg !17 } !llvm.dbg.cu = !{!2} -!18 = metadata !{metadata !0} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata -!"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata -!"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !18, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{null, metadata !5} !5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] @@ -46,4 +43,5 @@ entry: !15 = metadata !{i32 5, i32 15, metadata !10, null} !16 = metadata !{i32 6, i32 18, metadata !10, null} !17 = metadata !{i32 7, i32 1, metadata !0, null} - +!18 = metadata !{metadata !0} +!19 = metadata !{metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"} diff --git a/test/CodeGen/X86/dbg-value-isel.ll b/test/CodeGen/X86/dbg-value-isel.ll index e0dc9a4798..55be3b1f22 100644 --- a/test/CodeGen/X86/dbg-value-isel.ll +++ b/test/CodeGen/X86/dbg-value-isel.ll @@ -79,16 +79,15 @@ declare <4 x i32> @__amdil_get_global_id_int() nounwind declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!19 = metadata !{metadata !0} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"OCLlLwTXZ.cl", metadata !"/tmp", metadata !"clc", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786478, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, null, null, metadata !19, null, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{null, metadata !5} -!5 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] -!6 = metadata !{i32 589846, metadata !2, metadata !"uint", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] -!7 = metadata !{i32 786468, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] +!6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] +!7 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] !8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !9 = metadata !{i32 1, i32 32, metadata !0, null} !10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ] @@ -100,4 +99,5 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !16 = metadata !{i32 7, i32 26, metadata !11, null} !17 = metadata !{i32 9, i32 24, metadata !11, null} !18 = metadata !{i32 10, i32 1, metadata !0, null} - +!19 = metadata !{metadata !0} +!20 = metadata !{metadata !"OCLlLwTXZ.cl", metadata !"/tmp"} diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll index 5ad8370618..2a1916f26c 100644 --- a/test/CodeGen/X86/dbg-value-location.ll +++ b/test/CodeGen/X86/dbg-value-location.ll @@ -46,26 +46,29 @@ declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ] -!8 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ] !9 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !10 = metadata !{metadata !11} !11 = metadata !{i32 786468, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] !12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !13 = metadata !{i32 19509, i32 20, metadata !0, null} !14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17} -!15 = metadata !{i32 786443, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ] -!16 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ] +!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ] !17 = metadata !{i32 19514, i32 2, metadata !18, null} -!18 = metadata !{i32 786443, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ] +!18 = metadata !{i32 786443, metadata !1, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ] !22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17} !23 = metadata !{i32 19524, i32 1, metadata !18, null} +!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8} +!25 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ] +!26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"} +!27 = metadata !{metadata !"f.i", metadata !"/tmp"} diff --git a/test/CodeGen/X86/dbg-value-range.ll b/test/CodeGen/X86/dbg-value-range.ll index 2b26af93b6..6766dbe9ed 100644 --- a/test/CodeGen/X86/dbg-value-range.ll +++ b/test/CodeGen/X86/dbg-value-range.ll @@ -18,12 +18,10 @@ declare i32 @foo(...) declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!20 = metadata !{metadata !0} -!21 = metadata !{metadata !6, metadata !11} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, null, null, metadata !20, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -33,7 +31,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !9 = metadata !{metadata !10} !10 = metadata !{i32 786445, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ] !11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ] -!12 = metadata !{i32 786443, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ] !13 = metadata !{i32 5, i32 19, metadata !0, null} !14 = metadata !{i32 6, i32 14, metadata !12, null} !15 = metadata !{metadata !"int", metadata !16} @@ -41,6 +39,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !17 = metadata !{metadata !"Simple C/C++ TBAA", null} !18 = metadata !{i32 7, i32 2, metadata !12, null} !19 = metadata !{i32 8, i32 2, metadata !12, null} +!20 = metadata !{metadata !0} +!21 = metadata !{metadata !6, metadata !11} +!22 = metadata !{metadata !"bar.c", metadata !"/private/tmp"} ; Check that variable bar:b value range is appropriately trucated in debug info. ; The variable is in %rdi which is clobbered by 'movl %ebx, %edi' diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll index 63e6167bcb..3bc2957963 100644 --- a/test/CodeGen/X86/dwarf-comp-dir.ll +++ b/test/CodeGen/X86/dwarf-comp-dir.ll @@ -6,9 +6,11 @@ target triple = "x86_64-unknown-linux-gnu" !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"empty.c", metadata !"/home/nlewycky", metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ] !1 = metadata !{metadata !2} !2 = metadata !{i32 0} +!3 = metadata !{i32 786473, metadata !4} ; [ DW_TAG_file_type ] +!4 = metadata !{metadata !"empty.c", metadata !"/home/nlewycky"} ; The important part of the following check is that dir = #0. ; Dir Mod Time File Len File Name diff --git a/test/CodeGen/X86/fast-isel-args-fail.ll b/test/CodeGen/X86/fast-isel-args-fail.ll index 4995baa3e5..45a2b384de 100644 --- a/test/CodeGen/X86/fast-isel-args-fail.ll +++ b/test/CodeGen/X86/fast-isel-args-fail.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-apple-darwin10 +; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win32 | FileCheck %s ; Requires: Asserts ; Previously, this would cause an assert. @@ -8,3 +9,12 @@ entry: %add1 = add nsw i31 %add, %c ret i31 %add1 } + +; We don't handle the Windows CC, yet. +define i32 @foo(i32* %p) { +entry: +; CHECK: foo +; CHECK: movl (%rcx), %eax + %0 = load i32* %p, align 4 + ret i32 %0 +} diff --git a/test/CodeGen/X86/fast-isel-expect.ll b/test/CodeGen/X86/fast-isel-expect.ll index 1f53348bd0..c4be7f364f 100644 --- a/test/CodeGen/X86/fast-isel-expect.ll +++ b/test/CodeGen/X86/fast-isel-expect.ll @@ -5,7 +5,7 @@ target triple = "x86_64-unknown-linux-gnu" @glbl = extern_weak constant i8 -declare i64 @llvm.expect.i64(i64, i64) #0 +declare i64 @llvm.expect.i64(i64, i64) define void @test() { ; CHECK: movl $glbl @@ -19,5 +19,3 @@ bb1: bb2: unreachable } - -attributes #0 = { nounwind readnone } diff --git a/test/CodeGen/X86/Stats/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll index 74ecd045b3..34191e3f9a 100644 --- a/test/CodeGen/X86/Stats/hoist-invariant-load.ll +++ b/test/CodeGen/X86/hoist-invariant-load.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/test/CodeGen/X86/Stats/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll index c3f991d7a9..66074fb368 100644 --- a/test/CodeGen/X86/Stats/licm-nested.ll +++ b/test/CodeGen/X86/licm-nested.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3 ; MachineLICM should be able to hoist the symbolic addresses out of diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg index a8ad0f1a28..9d285bf4e2 100644 --- a/test/CodeGen/X86/lit.local.cfg +++ b/test/CodeGen/X86/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'X86' in targets: diff --git a/test/CodeGen/X86/misched-ilp.ll b/test/CodeGen/X86/misched-ilp.ll index c6cedb7be8..4ca296ca92 100644 --- a/test/CodeGen/X86/misched-ilp.ll +++ b/test/CodeGen/X86/misched-ilp.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s -; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s +; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=nocona -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s ; ; Basic verification of the ScheduleDAGILP metric. ; diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll index 65ee7b1d8e..bb42734833 100644 --- a/test/CodeGen/X86/movgs.ll +++ b/test/CodeGen/X86/movgs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=sse41 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64 -; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse41 | FileCheck %s --check-prefix=X64 define i32 @test1() nounwind readonly { entry: diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll index 9f7d036cf1..29b9f34464 100644 --- a/test/CodeGen/X86/multiple-loop-post-inc.ll +++ b/test/CodeGen/X86/multiple-loop-post-inc.ll @@ -1,4 +1,4 @@ -; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s +; RUN: llc -asm-verbose=false -disable-branch-fold -disable-block-placement -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s ; rdar://7236213 ; ; The scheduler's 2-address hack has been disabled, so there is diff --git a/test/CodeGen/X86/Stats/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll index 476bb10998..6425ef0e83 100644 --- a/test/CodeGen/X86/Stats/phi-immediate-factoring.ll +++ b/test/CodeGen/X86/phi-immediate-factoring.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 ; PR1296 diff --git a/test/CodeGen/X86/pr15296.ll b/test/CodeGen/X86/pr15296.ll new file mode 100644 index 0000000000..1187d80cdf --- /dev/null +++ b/test/CodeGen/X86/pr15296.ll @@ -0,0 +1,46 @@ +; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7-avx | FileCheck %s + +define <8 x i32> @shiftInput___vyuunu(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind { +allocas: + %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0 + %smear.1 = insertelement <8 x i32> %smear.0, i32 %shiftval, i32 1 + %smear.2 = insertelement <8 x i32> %smear.1, i32 %shiftval, i32 2 + %smear.3 = insertelement <8 x i32> %smear.2, i32 %shiftval, i32 3 + %smear.4 = insertelement <8 x i32> %smear.3, i32 %shiftval, i32 4 + %smear.5 = insertelement <8 x i32> %smear.4, i32 %shiftval, i32 5 + %smear.6 = insertelement <8 x i32> %smear.5, i32 %shiftval, i32 6 + %smear.7 = insertelement <8 x i32> %smear.6, i32 %shiftval, i32 7 + %bitop = lshr <8 x i32> %input, %smear.7 + ret <8 x i32> %bitop +} + +; CHECK: shiftInput___vyuunu +; CHECK: psrld +; CHECK: psrld +; CHECK: ret + +define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i32> %__mask) nounwind { +allocas: + %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0 + %smear.7 = shufflevector <8 x i32> %smear.0, <8 x i32> undef, <8 x i32> zeroinitializer + %bitop = lshr <8 x i32> %input, %smear.7 + ret <8 x i32> %bitop +} + +; CHECK: shiftInput___canonical +; CHECK: psrld +; CHECK: psrld +; CHECK: ret + +define <4 x i64> @shiftInput___64in32bitmode(<4 x i64> %input, i64 %shiftval, <4 x i64> %__mask) nounwind { +allocas: + %smear.0 = insertelement <4 x i64> undef, i64 %shiftval, i32 0 + %smear.7 = shufflevector <4 x i64> %smear.0, <4 x i64> undef, <4 x i32> zeroinitializer + %bitop = lshr <4 x i64> %input, %smear.7 + ret <4 x i64> %bitop +} + +; CHECK: shiftInput___64in32bitmode +; CHECK: psrlq +; CHECK: psrlq +; CHECK: ret diff --git a/test/CodeGen/X86/pr15309.ll b/test/CodeGen/X86/pr15309.ll new file mode 100644 index 0000000000..6dbbc72a7b --- /dev/null +++ b/test/CodeGen/X86/pr15309.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=i686-linux-pc -mcpu=corei7 | FileCheck %s + +define void @test_convert_float2_ulong2(<2 x i64>* nocapture %src, <2 x float>* nocapture %dest) noinline { +L.entry: + %0 = getelementptr <2 x i64>* %src, i32 10 + %1 = load <2 x i64>* %0, align 16 + %2 = uitofp <2 x i64> %1 to <2 x float> + %3 = getelementptr <2 x float>* %dest, i32 10 + store <2 x float> %2, <2 x float>* %3, align 8 + ret void +} + +; CHECK: test_convert_float2_ulong2 +; CHECK-NOT: cvtpd2ps +; CHECK: ret diff --git a/test/CodeGen/X86/Stats/pr3522.ll b/test/CodeGen/X86/pr3522.ll index d8f37781fc..9f8dc03706 100644 --- a/test/CodeGen/X86/Stats/pr3522.ll +++ b/test/CodeGen/X86/pr3522.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk" ; PR3522 diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll index ec2f302b14..efb51913c5 100644 --- a/test/CodeGen/X86/prefetch.ll +++ b/test/CodeGen/X86/prefetch.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s ; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW ; rdar://10538297 @@ -9,10 +10,12 @@ entry: ; CHECK: prefetcht1 ; CHECK: prefetcht0 ; CHECK: prefetchnta +; PRFCHW: prefetchw tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 ) ret void } diff --git a/test/CodeGen/X86/rdseed.ll b/test/CodeGen/X86/rdseed.ll new file mode 100644 index 0000000000..35de7ebf74 --- /dev/null +++ b/test/CodeGen/X86/rdseed.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdseed | FileCheck %s + +declare {i16, i32} @llvm.x86.rdseed.16() +declare {i32, i32} @llvm.x86.rdseed.32() +declare {i64, i32} @llvm.x86.rdseed.64() + +define i32 @_rdseed16_step(i16* %random_val) { + %call = call {i16, i32} @llvm.x86.rdseed.16() + %randval = extractvalue {i16, i32} %call, 0 + store i16 %randval, i16* %random_val + %isvalid = extractvalue {i16, i32} %call, 1 + ret i32 %isvalid +; CHECK: _rdseed16_step: +; CHECK: rdseedw %ax +; CHECK: movw %ax, (%r[[A0:di|cx]]) +; CHECK: movzwl %ax, %ecx +; CHECK: movl $1, %eax +; CHECK: cmovael %ecx, %eax +; CHECK: ret +} + +define i32 @_rdseed32_step(i32* %random_val) { + %call = call {i32, i32} @llvm.x86.rdseed.32() + %randval = extractvalue {i32, i32} %call, 0 + store i32 %randval, i32* %random_val + %isvalid = extractvalue {i32, i32} %call, 1 + ret i32 %isvalid +; CHECK: _rdseed32_step: +; CHECK: rdseedl %e[[T0:[a-z]+]] +; CHECK: movl %e[[T0]], (%r[[A0]]) +; CHECK: movl $1, %eax +; CHECK: cmovael %e[[T0]], %eax +; CHECK: ret +} + +define i32 @_rdseed64_step(i64* %random_val) { + %call = call {i64, i32} @llvm.x86.rdseed.64() + %randval = extractvalue {i64, i32} %call, 0 + store i64 %randval, i64* %random_val + %isvalid = extractvalue {i64, i32} %call, 1 + ret i32 %isvalid +; CHECK: _rdseed64_step: +; CHECK: rdseedq %r[[T1:[a-z]+]] +; CHECK: movq %r[[T1]], (%r[[A0]]) +; CHECK: movl $1, %eax +; CHECK: cmovael %e[[T1]], %eax +; CHECK: ret +} diff --git a/test/CodeGen/X86/Stats/regpressure.ll b/test/CodeGen/X86/regpressure.ll index 52d7b56f18..1f756bee8a 100644 --- a/test/CodeGen/X86/Stats/regpressure.ll +++ b/test/CodeGen/X86/regpressure.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ;; Both functions in this testcase should codegen to the same function, and ;; neither of them should require spilling anything to the stack. diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll index 2af355905d..ceb79ea927 100644 --- a/test/CodeGen/X86/sibcall.ll +++ b/test/CodeGen/X86/sibcall.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32 -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64 +; RUN: llc < %s -mtriple=i686-linux -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32 +; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64 define void @t1(i32 %x) nounwind ssp { entry: diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll index 649cd61ab7..2aca5b897d 100644 --- a/test/CodeGen/X86/sink-hoist.ll +++ b/test/CodeGen/X86/sink-hoist.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true | FileCheck %s +; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false | FileCheck %s ; Currently, floating-point selects are lowered to CFG triangles. ; This means that one side of the select is always unconditionally diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll index e8a79bfa6e..8cdecd81bf 100644 --- a/test/CodeGen/X86/tls.ll +++ b/test/CodeGen/X86/tls.ll @@ -22,13 +22,13 @@ define i32 @f1() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax +; X32_WIN-NEXT: movl _i1@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f1: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax +; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax ; X64_WIN-NEXT: ret entry: @@ -49,13 +49,13 @@ define i32* @f2() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax +; X32_WIN-NEXT: leal _i1@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f2: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax +; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax ; X64_WIN-NEXT: ret entry: @@ -75,13 +75,13 @@ define i32 @f3() nounwind { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax +; X32_WIN-NEXT: movl _i2@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f3: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax +; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax ; X64_WIN-NEXT: ret entry: @@ -102,13 +102,13 @@ define i32* @f4() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax +; X32_WIN-NEXT: leal _i2@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f4: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax +; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax ; X64_WIN-NEXT: ret entry: @@ -126,13 +126,13 @@ define i32 @f5() nounwind { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax +; X32_WIN-NEXT: movl _i3@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f5: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax +; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax ; X64_WIN-NEXT: ret entry: @@ -153,13 +153,13 @@ define i32* @f6() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax +; X32_WIN-NEXT: leal _i3@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f6: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax +; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax ; X64_WIN-NEXT: ret entry: @@ -234,14 +234,14 @@ define i16 @f11() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax +; X32_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax ; X32_WIN-NEXT: # kill ; X32_WIN-NEXT: ret ; X64_WIN: f11: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax +; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax ; X64_WIN-NEXT: # kill ; X64_WIN-NEXT: ret @@ -261,13 +261,13 @@ define i32 @f12() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax +; X32_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f12: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax +; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax ; X64_WIN-NEXT: ret entry: @@ -287,13 +287,13 @@ define i8 @f13() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al +; X32_WIN-NEXT: movb _b1@SECREL32(%eax), %al ; X32_WIN-NEXT: ret ; X64_WIN: f13: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movb b1@SECREL(%rax), %al +; X64_WIN-NEXT: movb b1@SECREL32(%rax), %al ; X64_WIN-NEXT: ret entry: @@ -312,13 +312,13 @@ define i32 @f14() { ; X32_WIN: movl __tls_index, %eax ; X32_WIN-NEXT: movl %fs:__tls_array, %ecx ; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax +; X32_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax ; X32_WIN-NEXT: ret ; X64_WIN: f14: ; X64_WIN: movl _tls_index(%rip), %eax ; X64_WIN-NEXT: movq %gs:88, %rcx ; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax +; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax ; X64_WIN-NEXT: ret entry: diff --git a/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll b/test/CodeGen/X86/twoaddr-coalesce-2.ll index af6d47af7a..cbcde06555 100644 --- a/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll +++ b/test/CodeGen/X86/twoaddr-coalesce-2.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \ ; RUN: grep "twoaddrinstr" | grep "Number of instructions aggressively commuted" ; rdar://6480363 diff --git a/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll b/test/CodeGen/X86/twoaddr-pass-sink.ll index 513c304e3b..9ca280627a 100644 --- a/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll +++ b/test/CodeGen/X86/twoaddr-pass-sink.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk" define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind { diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll index d3b4d31f6a..e02e3b5475 100644 --- a/test/CodeGen/X86/unknown-location.ll +++ b/test/CodeGen/X86/unknown-location.ll @@ -19,14 +19,15 @@ entry: } !llvm.dbg.cu = !{!3} -!9 = metadata !{metadata !1} !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"test.c", metadata !"/dir", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !".", metadata !"producer", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ] !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6} !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !7 = metadata !{i32 786443, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ] !8 = metadata !{i32 4, i32 3, metadata !7, null} +!9 = metadata !{metadata !1} +!10 = metadata !{metadata !"test.c", metadata !"/dir"} diff --git a/test/CodeGen/X86/vec_align_i256.ll b/test/CodeGen/X86/vec_align_i256.ll new file mode 100644 index 0000000000..078bcb1544 --- /dev/null +++ b/test/CodeGen/X86/vec_align_i256.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i686-apple-darwin8" + +; Make sure that we are not generating a movaps because the vector is aligned to 1. +;CHECK: @foo +;CHECK: xor +;CHECK-NEXT: vmovups +;CHECK-NEXT: ret +define void @foo() { + store <16 x i16> zeroinitializer, <16 x i16>* undef, align 1 + ret void +} diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll index e4a8f46cbc..863712ff48 100644 --- a/test/CodeGen/X86/vec_fpext.ll +++ b/test/CodeGen/X86/vec_fpext.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s -; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck --check-prefix=AVX %s +; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck --check-prefix=AVX %s ; PR11674 define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) { diff --git a/test/CodeGen/X86/Stats/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll index 2a4864a48a..4583e1925e 100644 --- a/test/CodeGen/X86/Stats/vec_insert-6.ll +++ b/test/CodeGen/X86/vec_insert-6.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 diff --git a/test/CodeGen/X86/Stats/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll index b26f920e5e..48db8de0d9 100644 --- a/test/CodeGen/X86/Stats/vec_shuffle-19.ll +++ b/test/CodeGen/X86/vec_shuffle-19.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 ; PR2485 diff --git a/test/CodeGen/X86/Stats/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll index b6b8ba6f84..5a2c444945 100644 --- a/test/CodeGen/X86/Stats/vec_shuffle-20.ll +++ b/test/CodeGen/X86/vec_shuffle-20.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll new file mode 100644 index 0000000000..d93f33ba0e --- /dev/null +++ b/test/CodeGen/X86/wide-fma-contraction.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s + +; CHECK: fmafunc +define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) { +; CHECK-NOT: vmulps +; CHECK-NOT: vaddps +; CHECK: vfmaddps +; CHECK-NOT: vmulps +; CHECK-NOT: vaddps +; CHECK: vfmaddps +; CHECK-NOT: vmulps +; CHECK-NOT: vaddps + %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c) + ret <16 x float> %ret +} + +declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone + + + diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll index 878c6db992..19cbfd45ed 100644 --- a/test/CodeGen/X86/win32_sret.ll +++ b/test/CodeGen/X86/win32_sret.ll @@ -1,28 +1,84 @@ -; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32 -; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32 +; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 +; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86 ; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX -; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32 -; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32 +; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32 +; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86 ; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX ; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer ; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer ; arguments are caller-cleanup like normal arguments. -define void @sret1(i8* sret) nounwind { +define void @sret1(i8* sret %x) nounwind { entry: -; WIN_X32: {{ret$}} -; MINGW_X32: ret $4 +; WIN32: sret1 +; WIN32: movb $42, (%eax) +; WIN32-NOT: popl %eax +; WIN32: {{ret$}} + +; MINGW_X86: sret1 +; MINGW_X86: ret $4 + +; LINUX: sret1 ; LINUX: ret $4 + + store i8 42, i8* %x, align 4 ret void } -define void @sret2(i32* sret %x, i32 %y) nounwind { +define void @sret2(i8* sret %x, i8 %y) nounwind { entry: -; WIN_X32: {{ret$}} -; MINGW_X32: ret $4 +; WIN32: sret2 +; WIN32: movb {{.*}}, (%eax) +; WIN32-NOT: popl %eax +; WIN32: {{ret$}} + +; MINGW_X86: sret2 +; MINGW_X86: ret $4 + +; LINUX: sret2 ; LINUX: ret $4 - store i32 %y, i32* %x + + store i8 %y, i8* %x ret void } +define void @sret3(i8* sret %x, i8* %y) nounwind { +entry: +; WIN32: sret3 +; WIN32: movb $42, (%eax) +; WIN32-NOT: movb $13, (%eax) +; WIN32-NOT: popl %eax +; WIN32: {{ret$}} + +; MINGW_X86: sret3 +; MINGW_X86: ret $4 + +; LINUX: sret3 +; LINUX: ret $4 + + store i8 42, i8* %x + store i8 13, i8* %y + ret void +} + +; PR15556 +%struct.S4 = type { i32, i32, i32 } + +define void @sret4(%struct.S4* noalias sret %agg.result) { +entry: +; WIN32: sret4 +; WIN32: movl $42, (%eax) +; WIN32-NOT: popl %eax +; WIN32: {{ret$}} + +; MINGW_X86: sret4 +; MINGW_X86: ret $4 + +; LINUX: sret4 +; LINUX: ret $4 + + %x = getelementptr inbounds %struct.S4* %agg.result, i32 0, i32 0 + store i32 42, i32* %x, align 4 + ret void +} diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll index 7f8ae07463..14591248f3 100644 --- a/test/CodeGen/X86/win_ftol2.ll +++ b/test/CodeGen/X86/win_ftol2.ll @@ -63,9 +63,9 @@ define i64 @double_ui64_2(double %x, double %y, double %z) nounwind { %1 = fdiv double %x, %y %2 = fsub double %x, %z - %3 = fptoui double %1 to i64 - %4 = fptoui double %2 to i64 - %5 = sub i64 %3, %4 + %3 = fptoui double %2 to i64 + %4 = fptoui double %1 to i64 + %5 = sub i64 %4, %3 ret i64 %5 } @@ -121,9 +121,9 @@ define {double, i64} @double_ui64_4(double %x, double %y) nounwind { ; FTOL_2: calll __ftol2 ;; stack is %x - %1 = fptoui double %x to i64 - %2 = fptoui double %y to i64 - %3 = sub i64 %1, %2 + %1 = fptoui double %y to i64 + %2 = fptoui double %x to i64 + %3 = sub i64 %2, %1 %4 = insertvalue {double, i64} undef, double %x, 0 %5 = insertvalue {double, i64} %4, i64 %3, 1 ret {double, i64} %5 diff --git a/test/CodeGen/X86/xtest.ll b/test/CodeGen/X86/xtest.ll new file mode 100644 index 0000000000..e85565edcd --- /dev/null +++ b/test/CodeGen/X86/xtest.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=x86-64 -mattr=+rtm | FileCheck %s + +declare i32 @llvm.x86.xtest() nounwind + +define i32 @test_xtest() nounwind uwtable { +entry: + %0 = tail call i32 @llvm.x86.xtest() nounwind + ret i32 %0 +; CHECK: test_xtest +; CHECK: xtest +} diff --git a/test/CodeGen/X86/Stats/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll index 4242530f77..5d25a2d749 100644 --- a/test/CodeGen/X86/Stats/zero-remat.ll +++ b/test/CodeGen/X86/zero-remat.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 ; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32 diff --git a/test/CodeGen/XCore/DbgValueOtherTargets.test b/test/CodeGen/XCore/DbgValueOtherTargets.test index 93ab7e0f15..7c2ecd0312 100644 --- a/test/CodeGen/XCore/DbgValueOtherTargets.test +++ b/test/CodeGen/XCore/DbgValueOtherTargets.test @@ -1 +1 @@ -; RUN: llc -O0 -march=xcore -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll +RUN: llc -O0 -march=xcore -asm-verbose < %S/../Inputs/DbgValueOtherTargets.ll | FileCheck %S/../Inputs/DbgValueOtherTargets.ll diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg index f8726af57f..8756f37fe8 100644 --- a/test/CodeGen/XCore/lit.local.cfg +++ b/test/CodeGen/XCore/lit.local.cfg @@ -1,4 +1,4 @@ -config.suffixes = ['.ll', '.c', '.cpp'] +config.suffixes = ['.ll', '.c', '.cpp', '.test'] targets = set(config.root.targets_to_build.split()) if not 'XCore' in targets: diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll index a2976a20de..5bfca21b3e 100644 --- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll +++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll @@ -1,10 +1,11 @@ ; RUN: llvm-as < %s | llvm-dis | FileCheck %s !dbg = !{!0} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !1, i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} -!1 = metadata !{i32 41, metadata !"/foo", metadata !"bar.cpp", null} +!0 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !1, i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 41, metadata !4} ; [ DW_TAG_file_type ] !2 = metadata !{i32 21, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_subroutine_type ] !3 = metadata !{null} +!4 = metadata !{metadata !"/foo", metadata !"bar.cpp"} define <{i32, i32}> @f1() { ; CHECK: !dbgx !1 diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll index 6efce1affb..13bd310395 100644 --- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll +++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll @@ -9,10 +9,10 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"fb.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 139632)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 720942, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @foo, null, null, metadata !10} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 720937, metadata !"fb.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9} diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll index 19be3f297f..83d6ac2822 100644 --- a/test/DebugInfo/2009-11-10-CurrentFn.ll +++ b/test/DebugInfo/2009-11-10-CurrentFn.ll @@ -12,10 +12,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"cf.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 139632)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 720913, i32 12, metadata !6, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 720942, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 720937, metadata !"cf.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null} diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll index 58953b00db..06c2367913 100644 --- a/test/DebugInfo/2010-03-24-MemberFn.ll +++ b/test/DebugInfo/2010-03-24-MemberFn.ll @@ -37,29 +37,31 @@ return: ; preds = %entry declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!5} -!24 = metadata !{metadata !3, metadata !12} !0 = metadata !{i32 786688, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ] !1 = metadata !{i32 786443, metadata !2, i32 3, i32 0} ; [ DW_TAG_lexical_block ] !2 = metadata !{i32 786443, metadata !3, i32 3, i32 0} ; [ DW_TAG_lexical_block ] -!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", metadata !4, i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ] -!4 = metadata !{i32 786473, metadata !"one.cc", metadata !"/tmp/", metadata !5} ; [ DW_TAG_file_type ] -!5 = metadata !{i32 786449, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !24, null} ; [ DW_TAG_compile_unit ] -!6 = metadata !{i32 786453, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] +!3 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ] +!4 = metadata !{i32 786473, metadata !25} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786449, i32 4, metadata !4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !24, null, metadata !""} ; [ DW_TAG_compile_unit ] +!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ] !7 = metadata !{metadata !8} -!8 = metadata !{i32 786468, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!9 = metadata !{i32 786451, metadata !4, metadata !"S", metadata !10, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] -!10 = metadata !{i32 786473, metadata !"one.h", metadata !"/tmp/", metadata !5} ; [ DW_TAG_file_type ] +!8 = metadata !{i32 786468, metadata !25, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] +!10 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ] !11 = metadata !{metadata !12} -!12 = metadata !{i32 786478, i32 0, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", metadata !10, i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ] -!13 = metadata !{i32 786453, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ] +!12 = metadata !{i32 786478, metadata !10, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ] +!13 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ] !14 = metadata !{metadata !8, metadata !15} -!15 = metadata !{i32 786447, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ] +!15 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ] !16 = metadata !{i32 3, i32 0, metadata !1, null} !17 = metadata !{i32 3, i32 0, metadata !3, null} !18 = metadata !{i32 786689, metadata !12, metadata !"this", metadata !10, i32 3, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ] -!19 = metadata !{i32 786470, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ] -!20 = metadata !{i32 786447, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] +!19 = metadata !{i32 786470, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ] +!20 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] !21 = metadata !{i32 3, i32 0, metadata !12, null} !22 = metadata !{i32 3, i32 0, metadata !23, null} !23 = metadata !{i32 786443, metadata !12, i32 3, i32 0} ; [ DW_TAG_lexical_block ] +!24 = metadata !{metadata !3, metadata !12} +!25 = metadata !{metadata !"one.cc", metadata !"/tmp/"} +!26 = metadata !{metadata !"one.h", metadata !"/tmp/"} diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll index e665c84d58..dd98db9045 100644 --- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll +++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll @@ -55,18 +55,18 @@ entry: !0 = metadata !{i32 786688, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ] !1 = metadata !{i32 786443, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ] -!2 = metadata !{i32 786478, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786478, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ] !3 = metadata !{i32 786473, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ] -!4 = metadata !{i32 786449, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp", metadata !"clang 1.5", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !37, null} ; [ DW_TAG_compile_unit ] -!5 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{i32 786449, i32 4, metadata !3, metadata !"clang 1.5", i1 false, metadata !"", i32 0, null, null, metadata !37, null, metadata !""} ; [ DW_TAG_compile_unit ] +!5 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ] !6 = metadata !{metadata !7} -!7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!8 = metadata !{i32 786434, metadata !3, metadata !"B", metadata !3, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ] +!7 = metadata !{i32 786468, metadata !3, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 786434, metadata !3, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ] !9 = metadata !{metadata !10} -!10 = metadata !{i32 786478, i32 0, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ] -!11 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ] +!10 = metadata !{i32 786478, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ] !12 = metadata !{metadata !7, metadata !13} -!13 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ] +!13 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ] !14 = metadata !{i32 16, i32 5, metadata !1, null} !15 = metadata !{i32 17, i32 3, metadata !1, null} !16 = metadata !{i32 18, i32 1, metadata !2, null} @@ -74,12 +74,12 @@ entry: !18 = metadata !{i32 4, i32 7, metadata !10, null} !19 = metadata !{i32 786688, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21, i32 0, null} ; [ DW_TAG_auto_variable ] !20 = metadata !{i32 786443, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ] -!21 = metadata !{i32 786434, metadata !10, metadata !"A", metadata !3, i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ] +!21 = metadata !{i32 786434, metadata !3, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ] !22 = metadata !{metadata !23} -!23 = metadata !{i32 786478, i32 0, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ] -!24 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ] +!23 = metadata !{i32 786478, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ] +!24 = metadata !{i32 786453, metadata !3, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ] !25 = metadata !{metadata !7, metadata !26} -!26 = metadata !{i32 786447, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ] +!26 = metadata !{i32 786447, metadata !3, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ] !27 = metadata !{i32 9, i32 7, metadata !20, null} !28 = metadata !{i32 786688, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ] !29 = metadata !{i32 10, i32 9, metadata !20, null} diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll index 9905431192..f9e90cd1b3 100644 --- a/test/DebugInfo/2010-04-19-FramePtr.ll +++ b/test/DebugInfo/2010-04-19-FramePtr.ll @@ -23,9 +23,9 @@ return: ; preds = %entry !9 = metadata !{metadata !1} !0 = metadata !{i32 2, i32 0, metadata !1, null} -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"a.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ] !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6} !6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll index 5fff4229be..75e4389afe 100644 --- a/test/DebugInfo/2010-05-10-MultipleCU.ll +++ b/test/DebugInfo/2010-05-10-MultipleCU.ll @@ -32,17 +32,17 @@ return: !0 = metadata !{i32 3, i32 0, metadata !1, null} !1 = metadata !{i32 786443, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ] -!2 = metadata !{i32 786478, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786478, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !3 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ] -!4 = metadata !{i32 786449, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !16, null} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 786449, i32 1, metadata !3, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !16, null, metadata !""} ; [ DW_TAG_compile_unit ] !5 = metadata !{i32 786453, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ] !6 = metadata !{metadata !7} !7 = metadata !{i32 786468, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !8 = metadata !{i32 3, i32 0, metadata !9, null} !9 = metadata !{i32 786443, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ] -!10 = metadata !{i32 786478, i32 0, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 786478, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] !11 = metadata !{i32 786473, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ] -!12 = metadata !{i32 786449, i32 0, i32 1, metadata !"b.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !17, null} ; [ DW_TAG_compile_unit ] +!12 = metadata !{i32 786449, i32 1, metadata !11, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ] !13 = metadata !{i32 786453, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ] !14 = metadata !{metadata !15} !15 = metadata !{i32 786468, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll index 0e80c1b9ae..e44362d412 100644 --- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll +++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll @@ -22,32 +22,32 @@ entry: } !llvm.dbg.cu = !{!2} -!25 = metadata !{metadata !0, metadata !6} -!26 = metadata !{metadata !16} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"bar.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 1, metadata !1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, null, null, metadata !25, metadata !26, metadata !""} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5, metadata !5} -!5 = metadata !{i32 786468, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{i32 786468, metadata !27, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 786478, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !5} !9 = metadata !{i32 786689, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ] !10 = metadata !{i32 786688, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12, i32 0, null} ; [ DW_TAG_auto_variable ] -!11 = metadata !{i32 786443, metadata !0, i32 9, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] -!12 = metadata !{i32 786451, metadata !0, metadata !"X", metadata !1, i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ] +!11 = metadata !{i32 786443, metadata !1, metadata !0, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] +!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ] !13 = metadata !{metadata !14, metadata !15} -!14 = metadata !{i32 786445, metadata !12, metadata !"a", metadata !1, i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ] -!15 = metadata !{i32 786445, metadata !12, metadata !"b", metadata !1, i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ] +!14 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"a", i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ] +!15 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"b", i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ] !16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ] !17 = metadata !{i32 15, i32 0, metadata !18, null} -!18 = metadata !{i32 786443, metadata !6, i32 14, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!18 = metadata !{i32 786443, metadata !1, metadata !6, i32 14, i32 0, i32 1} ; [ DW_TAG_lexical_block ] !19 = metadata !{i32 9, i32 0, metadata !0, metadata !17} !20 = metadata !{null} !21 = metadata !{i32 9, i32 0, metadata !11, metadata !17} !22 = metadata !{i32 11, i32 0, metadata !11, metadata !17} !23 = metadata !{i32 16, i32 0, metadata !18, null} !24 = metadata !{metadata !9, metadata !10} - +!25 = metadata !{metadata !0, metadata !6} +!26 = metadata !{metadata !16} +!27 = metadata !{metadata !"bar.c", metadata !"/tmp/"} diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll index b94f77558e..673c789fe6 100644 --- a/test/DebugInfo/AArch64/dwarfdump.ll +++ b/test/DebugInfo/AArch64/dwarfdump.ll @@ -22,12 +22,13 @@ attributes #0 = { nounwind } !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"tmp.c", metadata !"/home/tim/llvm/build", metadata !"clang version 3.3 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99] !1 = metadata !{i32 0} !2 = metadata !{metadata !3} -!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main] -!4 = metadata !{i32 786473, metadata !"tmp.c", metadata !"/home/tim/llvm/build", null} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786478, metadata !4, metadata !"main", metadata !"main", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main] +!4 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ] !5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !6 = metadata !{metadata !7} !7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !8 = metadata !{i32 2, i32 0, metadata !3, null} +!9 = metadata !{metadata !"tmp.c", metadata !"/home/tim/llvm/build"} diff --git a/test/DebugInfo/AArch64/variable-loc.ll b/test/DebugInfo/AArch64/variable-loc.ll index 9a7f7dba94..ac3037e04b 100644 --- a/test/DebugInfo/AArch64/variable-loc.ll +++ b/test/DebugInfo/AArch64/variable-loc.ll @@ -69,24 +69,24 @@ declare i32 @printf(i8*, ...) !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build", metadata !"clang version 3.2 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !11, metadata !14} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array] -!6 = metadata !{i32 786473, metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array] +!6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{null, metadata !9, metadata !10} -!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] -!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!11 = metadata !{i32 786478, i32 0, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array] +!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] +!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!11 = metadata !{i32 786478, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", metadata !6, i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array] !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !13 = metadata !{metadata !10, metadata !9, metadata !10} -!14 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main] +!14 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main] !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !16 = metadata !{metadata !10} !17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19] -!18 = metadata !{i32 786443, metadata !14, i32 18, i32 16, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c] -!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int] +!18 = metadata !{i32 786443, metadata !6, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c] +!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int] !20 = metadata !{i32 786465, i64 0, i64 99} ; [ DW_TAG_subrange_type ] [0, 99] !22 = metadata !{i32 19, i32 7, metadata !18, null} !23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20] @@ -95,3 +95,4 @@ declare i32 @printf(i8*, ...) !26 = metadata !{i32 23, i32 9, metadata !18, null} !27 = metadata !{i32 24, i32 3, metadata !18, null} !28 = metadata !{i32 26, i32 3, metadata !18, null} +!29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"} diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll index a859bb2412..5169647fa4 100644 --- a/test/DebugInfo/X86/2010-04-13-PubType.ll +++ b/test/DebugInfo/X86/2010-04-13-PubType.ll @@ -29,22 +29,23 @@ return: ; preds = %entry declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!3} -!17 = metadata !{metadata !1} !0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"a.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !17, null} ; [ DW_TAG_compile_unit ] -!4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, i32 1, metadata !2, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, metadata !""} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6, metadata !7, metadata !9} -!6 = metadata !{i32 786468, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!7 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] -!8 = metadata !{i32 786451, metadata !2, metadata !"X", metadata !2, i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ] -!9 = metadata !{i32 786447, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] -!10 = metadata !{i32 786451, metadata !2, metadata !"Y", metadata !2, i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] +!6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!7 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] +!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ] +!9 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] +!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ] !11 = metadata !{metadata !12} -!12 = metadata !{i32 786445, metadata !10, metadata !"x", metadata !2, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] +!12 = metadata !{i32 786445, metadata !18, metadata !10, metadata !"x", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] !13 = metadata !{i32 7, i32 0, metadata !1, null} !14 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ] !15 = metadata !{i32 7, i32 0, metadata !16, null} !16 = metadata !{i32 786443, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ] +!17 = metadata !{metadata !1} +!18 = metadata !{metadata !"a.c", metadata !"/tmp/"} diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll index bc49735e07..d05dfc6c32 100644 --- a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll +++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll @@ -12,12 +12,10 @@ entry: declare void @bar(i32) !llvm.dbg.cu = !{!2} -!10 = metadata !{metadata !0} -!11 = metadata !{metadata !5} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, null, null, metadata !10, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] !4 = metadata !{null} !5 = metadata !{i32 786471, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201, null} ; [ DW_TAG_constant ] @@ -25,3 +23,6 @@ declare void @bar(i32) !7 = metadata !{i32 786468, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] !8 = metadata !{i32 3, i32 14, metadata !9, null} !9 = metadata !{i32 786443, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!10 = metadata !{metadata !0} +!11 = metadata !{metadata !5} +!12 = metadata !{metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D"} diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll index 94d6a19a29..ad55db05a7 100644 --- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll +++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll @@ -18,23 +18,24 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", metadata !"clang version 3.0 (trunk)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 720937, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 720937, metadata !20} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !10 = metadata !{metadata !11} !11 = metadata !{i32 720932} ; [ DW_TAG_base_type ] !12 = metadata !{metadata !14} !14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB, null} ; [ DW_TAG_variable ] !15 = metadata !{i32 786688, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] -!16 = metadata !{i32 786443, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!16 = metadata !{i32 786443, metadata !20, metadata !5, i32 3, i32 9, i32 0} ; [ DW_TAG_lexical_block ] !17 = metadata !{i32 4, i32 9, metadata !16, null} !18 = metadata !{i32 4, i32 23, metadata !16, null} !19 = metadata !{i32 5, i32 5, metadata !16, null} +!20 = metadata !{metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo"} ; CHECK: DW_TAG_variable [3] ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000043] = "GLB") diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll index ba8a763821..e248aa6029 100644 --- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll +++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll @@ -88,50 +88,50 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", metadata !"clang version 3.1 (trunk 146596)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 720913, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !9} -!5 = metadata !{i32 720898, null, metadata !"bar", metadata !6, i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ] -!6 = metadata !{i32 720937, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ] +!6 = metadata !{i32 720937, metadata !82} ; [ DW_TAG_file_type ] !7 = metadata !{metadata !8, metadata !19, metadata !21} -!8 = metadata !{i32 720909, metadata !5, metadata !"b", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] -!9 = metadata !{i32 720898, null, metadata !"baz", metadata !6, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ] +!8 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] +!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ] !10 = metadata !{metadata !11, metadata !13} -!11 = metadata !{i32 720909, metadata !9, metadata !"h", metadata !6, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] -!12 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!13 = metadata !{i32 720942, i32 0, metadata !9, metadata !"baz", metadata !"baz", metadata !"", metadata !6, i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 720909, metadata !82, metadata !9, metadata !"h", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] +!12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!13 = metadata !{i32 720942, metadata !6, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ] !14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !15 = metadata !{null, metadata !16, metadata !12} !16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ] !17 = metadata !{metadata !18} !18 = metadata !{i32 720932} ; [ DW_TAG_base_type ] -!19 = metadata !{i32 720909, metadata !5, metadata !"b_ref", metadata !6, i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ] +!19 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b_ref", i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ] !20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ] -!21 = metadata !{i32 720942, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ] +!21 = metadata !{i32 720942, metadata !6, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ] !22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !23 = metadata !{null, metadata !24, metadata !12} !24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ] !25 = metadata !{metadata !26} !26 = metadata !{i32 720932} ; [ DW_TAG_base_type ] !27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46} -!29 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ] +!29 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ] !30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !31 = metadata !{metadata !12, metadata !12, metadata !32} -!32 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] -!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ] -!34 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!32 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] +!33 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] !35 = metadata !{metadata !36} !36 = metadata !{i32 720932} ; [ DW_TAG_base_type ] -!37 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ] +!37 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ] !38 = metadata !{metadata !39} !39 = metadata !{i32 720932} ; [ DW_TAG_base_type ] -!40 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ] +!40 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ] !41 = metadata !{metadata !42} !42 = metadata !{i32 720932} ; [ DW_TAG_base_type ] -!43 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ] +!43 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ] !44 = metadata !{metadata !45} !45 = metadata !{i32 720932} ; [ DW_TAG_base_type ] -!46 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ] +!46 = metadata !{i32 720942, metadata !6, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ] !47 = metadata !{metadata !48} !48 = metadata !{i32 720932} ; [ DW_TAG_base_type ] !49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] @@ -167,3 +167,4 @@ entry: !79 = metadata !{i32 6, i32 23, metadata !46, null} !80 = metadata !{i32 6, i32 24, metadata !81, null} !81 = metadata !{i32 720907, metadata !46, i32 6, i32 23, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] +!82 = metadata !{metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref"} diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll index 3396936983..84e3f63097 100644 --- a/test/DebugInfo/X86/DW_AT_byte_size.ll +++ b/test/DebugInfo/X86/DW_AT_byte_size.ll @@ -24,21 +24,22 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 150996)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14, i32 3} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9, metadata !10} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] -!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] +!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ] !12 = metadata !{metadata !13} -!13 = metadata !{i32 786445, metadata !11, metadata !"b", metadata !6, i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] +!13 = metadata !{i32 786445, metadata !20, metadata !11, metadata !"b", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] !14 = metadata !{metadata !15} !15 = metadata !{i32 786468} ; [ DW_TAG_base_type ] !16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !17 = metadata !{i32 3, i32 13, metadata !5, null} !18 = metadata !{i32 4, i32 3, metadata !19, null} -!19 = metadata !{i32 786443, metadata !5, i32 3, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!19 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ] +!20 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"} diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll index ec84b489ba..356360b098 100644 --- a/test/DebugInfo/X86/DW_AT_location-reference.ll +++ b/test/DebugInfo/X86/DW_AT_location-reference.ll @@ -86,16 +86,14 @@ declare i32 @g(i32, i32) declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!21 = metadata !{metadata !0} -!22 = metadata !{metadata !5} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation", metadata !"clang version 3.0 (trunk)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !21, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"f", metadata !"f", metadata !"", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, i32 12, metadata !1, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, null, null, metadata !21, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{null} !5 = metadata !{i32 786688, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ] -!6 = metadata !{i32 786443, metadata !0, i32 4, i32 14, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!6 = metadata !{i32 786443, metadata !1, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ] !7 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !8 = metadata !{i32 6, i32 3, metadata !6, null} !9 = metadata !{metadata !"int", metadata !10} @@ -110,3 +108,6 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !18 = metadata !{i32 11, i32 3, metadata !6, null} !19 = metadata !{i32 12, i32 3, metadata !6, null} !20 = metadata !{i32 13, i32 1, metadata !6, null} +!21 = metadata !{metadata !0} +!22 = metadata !{metadata !5} +!23 = metadata !{metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation"} diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll index 789fcebd0c..a3ad26cf82 100644 --- a/test/DebugInfo/X86/DW_AT_object_pointer.ll +++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll @@ -47,38 +47,39 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests", metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !10, metadata !20} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", metadata !6, i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo] -!6 = metadata !{i32 786473, metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo] +!6 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!10 = metadata !{i32 786478, i32 0, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", metadata !6, i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!10 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A] !11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !12 = metadata !{null, metadata !13} !13 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A] -!14 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ] +!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [from ] !15 = metadata !{metadata !16, metadata !17} -!16 = metadata !{i32 786445, metadata !14, metadata !"m_a", metadata !6, i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int] -!17 = metadata !{i32 786478, i32 0, metadata !14, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A] +!16 = metadata !{i32 786445, metadata !37, metadata !14, metadata !"m_a", i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int] +!17 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A] !18 = metadata !{metadata !19} !19 = metadata !{i32 786468} ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0] -!20 = metadata !{i32 786478, i32 0, null, metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", metadata !6, i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A] -!36 = metadata !{i32 786689, metadata !5, metadata !"", metadata !6, i32 16777223, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 7] -!35 = metadata !{i32 7, i32 0, metadata !5, null} +!20 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A] !21 = metadata !{i32 786688, metadata !22, metadata !"a", metadata !6, i32 8, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 8] -!22 = metadata !{i32 786443, metadata !5, i32 7, i32 11, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp] +!22 = metadata !{i32 786443, metadata !6, metadata !5, i32 7, i32 11, i32 0} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp] !23 = metadata !{i32 8, i32 5, metadata !22, null} !24 = metadata !{i32 8, i32 6, metadata !22, null} !25 = metadata !{i32 9, i32 3, metadata !22, null} !26 = metadata !{i32 786689, metadata !10, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3] -!27 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A] +!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A] !28 = metadata !{i32 3, i32 3, metadata !10, null} !29 = metadata !{i32 3, i32 18, metadata !10, null} !30 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3] !31 = metadata !{i32 3, i32 3, metadata !20, null} !32 = metadata !{i32 3, i32 9, metadata !33, null} -!33 = metadata !{i32 786443, metadata !20, i32 3, i32 7, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp] +!33 = metadata !{i32 786443, metadata !6, metadata !20, i32 3, i32 7, i32 1} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp] !34 = metadata !{i32 3, i32 18, metadata !33, null} +!35 = metadata !{i32 7, i32 0, metadata !5, null} +!36 = metadata !{i32 786689, metadata !5, metadata !"", metadata !6, i32 16777223, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 7] +!37 = metadata !{metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests"} diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll index e1d84dbe6e..07849f3522 100644 --- a/test/DebugInfo/X86/DW_AT_specification.ll +++ b/test/DebugInfo/X86/DW_AT_specification.ll @@ -16,17 +16,17 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"<unknown>", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 720937, metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16, i32 4} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 720937, metadata !27} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null, metadata !9} !9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ] -!10 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] -!11 = metadata !{i32 720942, i32 0, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ] -!12 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ] +!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ] +!12 = metadata !{i32 720898, metadata !27, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ] !13 = metadata !{metadata !11} !14 = metadata !{metadata !15} !15 = metadata !{i32 720932} ; [ DW_TAG_base_type ] @@ -34,7 +34,8 @@ entry: !17 = metadata !{i32 720932} ; [ DW_TAG_base_type ] !18 = metadata !{metadata !20} !20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x, null} ; [ DW_TAG_variable ] -!21 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] -!22 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!21 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] +!22 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !25 = metadata !{i32 6, i32 1, metadata !26, null} !26 = metadata !{i32 786443, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!27 = metadata !{metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"} diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll index 010d66eeb3..f60175fb69 100644 --- a/test/DebugInfo/X86/DW_TAG_friend.ll +++ b/test/DebugInfo/X86/DW_TAG_friend.ll @@ -17,29 +17,30 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !17} !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] -!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] +!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] !8 = metadata !{metadata !9, metadata !11} -!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] -!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!11 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ] +!9 = metadata !{i32 786445, metadata !28, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] +!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!11 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ] !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !13 = metadata !{null, metadata !14} !14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ] !15 = metadata !{metadata !16} !16 = metadata !{i32 786468} ; [ DW_TAG_base_type ] !17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b, null} ; [ DW_TAG_variable ] -!18 = metadata !{i32 786434, null, metadata !"B", metadata !6, i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ] +!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ] !19 = metadata !{metadata !20, metadata !21, metadata !27} -!20 = metadata !{i32 786445, metadata !18, metadata !"b", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] -!21 = metadata !{i32 786478, i32 0, metadata !18, metadata !"B", metadata !"B", metadata !"", metadata !6, i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ] +!20 = metadata !{i32 786445, metadata !28, metadata !18, metadata !"b", i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] +!21 = metadata !{i32 786478, metadata !6, metadata !18, metadata !"B", metadata !"B", metadata !"", i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ] !22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !23 = metadata !{null, metadata !24} !24 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ] !25 = metadata !{metadata !26} !26 = metadata !{i32 786468} ; [ DW_TAG_base_type ] !27 = metadata !{i32 786474, metadata !18, null, metadata !6, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_friend ] +!28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"} diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll index 85027aa72f..a8f6cca750 100644 --- a/test/DebugInfo/X86/aligned_stack_var.ll +++ b/test/DebugInfo/X86/aligned_stack_var.ll @@ -26,15 +26,15 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/debuginfo", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null} !9 = metadata !{i32 786688, metadata !10, metadata !"x", metadata !6, i32 2, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] -!10 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!10 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] !11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !12 = metadata !{i32 2, i32 7, metadata !10, null} !13 = metadata !{i32 3, i32 1, metadata !10, null} diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll index 0f21708078..fadea775aa 100644 --- a/test/DebugInfo/X86/block-capture.ll +++ b/test/DebugInfo/X86/block-capture.ll @@ -62,55 +62,55 @@ declare i32 @__objc_personality_v0(...) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!35, !36, !37, !38} -!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 151227)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 5} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !63} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null, metadata !9} -!9 = metadata !{i32 786454, null, metadata !"dispatch_block_t", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] -!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] -!11 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_generic", metadata !6, i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!9 = metadata !{i32 786454, metadata !63, null, metadata !"dispatch_block_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] +!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] +!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ] !12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19} -!13 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ] -!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] -!15 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ] -!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!17 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ] -!18 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ] -!19 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ] -!20 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ] -!21 = metadata !{i32 786451, metadata !6, metadata !"__block_descriptor", metadata !6, i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!13 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ] +!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!15 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ] +!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!17 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ] +!18 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ] +!19 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ] +!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ] +!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ] !22 = metadata !{metadata !23, metadata !25} -!23 = metadata !{i32 786445, metadata !6, metadata !"reserved", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] -!24 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] -!25 = metadata !{i32 786445, metadata !6, metadata !"Size", metadata !6, i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ] +!23 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] +!24 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!25 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ] !26 = metadata !{metadata !27} !27 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!28 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", metadata !6, i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ] +!28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26, i32 7} ; [ DW_TAG_subprogram ] !29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !30 = metadata !{null, metadata !14} -!31 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", metadata !6, i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ] +!31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ] !32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !33 = metadata !{null, metadata !14, metadata !14} -!34 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", metadata !6, i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ] +!34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26, i32 10} ; [ DW_TAG_subprogram ] !35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} !36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} !37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} !38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} !39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ] -!40 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ] -!41 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_1", metadata !6, i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!40 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ] +!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ] !42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50} -!43 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ] -!44 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ] -!45 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ] -!46 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ] -!47 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ] -!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] -!49 = metadata !{i32 786451, null, metadata !"__block_descriptor_withcopydispose", metadata !6, i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] -!50 = metadata !{i32 786445, metadata !6, metadata !"block", metadata !6, i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ] +!43 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ] +!44 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ] +!45 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ] +!46 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ] +!47 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ] +!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] +!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] +!50 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"block", i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ] !51 = metadata !{i32 7, i32 18, metadata !28, null} !52 = metadata !{i32 7, i32 19, metadata !28, null} !53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] @@ -123,3 +123,4 @@ declare i32 @__objc_personality_v0(...) !60 = metadata !{i32 786443, metadata !57, i32 9, i32 35, metadata !6, i32 3} ; [ DW_TAG_lexical_block ] !61 = metadata !{i32 10, i32 21, metadata !28, null} !62 = metadata !{i32 9, i32 20, metadata !56, null} +!63 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"} diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll index 5c466204de..48e1defd4c 100644 --- a/test/DebugInfo/X86/concrete_out_of_line.ll +++ b/test/DebugInfo/X86/concrete_out_of_line.ll @@ -34,41 +34,41 @@ declare void @_Z8moz_freePv(i8*) !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"nsAutoRefCnt.cpp", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", metadata !"clang version 3.1 ()", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31} -!5 = metadata !{i32 720942, i32 0, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 720937, metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9, metadata !10} -!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ] -!11 = metadata !{i32 786451, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] -!12 = metadata !{i32 720942, i32 0, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ] -!13 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] +!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!12 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ] +!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] !14 = metadata !{metadata !12, metadata !15} -!15 = metadata !{i32 720942, i32 0, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", metadata !6, i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ] +!15 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ] !16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !17 = metadata !{null, metadata !10} !18 = metadata !{i32 720932} ; [ DW_TAG_base_type ] !20 = metadata !{metadata !22} !22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ] -!23 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] +!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] !24 = metadata !{metadata !26} !26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ] -!27 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ] +!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ] !28 = metadata !{metadata !30} !30 = metadata !{i32 786689, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ] -!31 = metadata !{i32 720942, i32 0, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ] +!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ] !32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !33 = metadata !{metadata !9, metadata !34, metadata !9} !34 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ] -!35 = metadata !{i32 786451, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] -!36 = metadata !{i32 720942, i32 0, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ] -!37 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ] +!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!36 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ] +!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ] !38 = metadata !{metadata !39, metadata !40, metadata !36} -!39 = metadata !{i32 786445, metadata !37, metadata !"mValue", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] -!40 = metadata !{i32 720942, i32 0, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", metadata !6, i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] +!39 = metadata !{i32 786445, metadata !59, metadata !37, metadata !"mValue", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] +!40 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] !41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !42 = metadata !{null, metadata !34} !43 = metadata !{metadata !45, metadata !46} @@ -77,11 +77,12 @@ declare void @_Z8moz_freePv(i8*) !47 = metadata !{metadata !49} !49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null, null} ; [ DW_TAG_variable ] !50 = metadata !{i32 5, i32 5, metadata !51, metadata !52} -!51 = metadata !{i32 786443, metadata !31, i32 4, i32 29, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] +!51 = metadata !{i32 786443, metadata !6, metadata !31, i32 4, i32 29, i32 2} ; [ DW_TAG_lexical_block ] !52 = metadata !{i32 15, i32 0, metadata !53, null} -!53 = metadata !{i32 786443, metadata !5, i32 14, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!53 = metadata !{i32 786443, metadata !6, metadata !5, i32 14, i32 34, i32 0} ; [ DW_TAG_lexical_block ] !54 = metadata !{i32 19, i32 3, metadata !55, metadata !56} -!55 = metadata !{i32 786443, metadata !27, i32 18, i32 41, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] +!55 = metadata !{i32 786443, metadata !6, metadata !27, i32 18, i32 41, i32 1} ; [ DW_TAG_lexical_block ] !56 = metadata !{i32 18, i32 41, metadata !23, metadata !52} !57 = metadata !{i32 19, i32 3, metadata !55, metadata !58} !58 = metadata !{i32 18, i32 41, metadata !23, null} +!59 = metadata !{metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src"} diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll index 27273c7671..e7a554ff86 100644 --- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll +++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll @@ -46,34 +46,31 @@ entry: declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!39 = metadata !{metadata !0, metadata !6} -!40 = metadata !{metadata !19} -!41 = metadata !{metadata !9, metadata !18} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40, null} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !42} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, null, null, metadata !39, metadata !40, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} -!5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!6 = metadata !{i32 786478, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ] -!7 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null} !9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] -!10 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] -!11 = metadata !{i32 786454, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ] -!12 = metadata !{i32 786451, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] +!11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ] +!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ] !13 = metadata !{metadata !14, metadata !17} -!14 = metadata !{i32 786445, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ] -!15 = metadata !{i32 786447, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] -!16 = metadata !{i32 786468, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] -!17 = metadata !{i32 786445, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ] +!14 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"m", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ] +!15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] +!16 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!17 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"nums", i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ] !18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] !19 = metadata !{i32 786484, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p, null} ; [ DW_TAG_variable ] !20 = metadata !{i32 7, i32 13, metadata !0, null} !21 = metadata !{i32 7, i32 21, metadata !0, null} !22 = metadata !{i32 9, i32 3, metadata !23, null} -!23 = metadata !{i32 786443, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!23 = metadata !{i32 786443, metadata !1, metadata !0, i32 8, i32 1, i32 0} ; [ DW_TAG_lexical_block ] !24 = metadata !{metadata !"int", metadata !25} !25 = metadata !{metadata !"omnipotent char", metadata !26} !26 = metadata !{metadata !"Simple C/C++ TBAA", null} @@ -83,9 +80,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !30 = metadata !{%struct.S1* @p} !31 = metadata !{i32 7, i32 13, metadata !0, metadata !32} !32 = metadata !{i32 16, i32 3, metadata !33, null} -!33 = metadata !{i32 786443, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!33 = metadata !{i32 786443, metadata !1, metadata !6, i32 15, i32 15, i32 1} ; [ DW_TAG_lexical_block ] !34 = metadata !{i32 1} !35 = metadata !{i32 7, i32 21, metadata !0, metadata !32} !36 = metadata !{i32 9, i32 3, metadata !23, metadata !32} !37 = metadata !{i32 10, i32 3, metadata !23, metadata !32} !38 = metadata !{i32 17, i32 1, metadata !33, null} +!39 = metadata !{metadata !0, metadata !6} +!40 = metadata !{metadata !19} +!41 = metadata !{metadata !9, metadata !18} +!42 = metadata !{metadata !"nm2.c", metadata !"/private/tmp"} diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll new file mode 100644 index 0000000000..77e02c62aa --- /dev/null +++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll @@ -0,0 +1,106 @@ +; RUN: llc -mtriple x86_64-apple-darwin -filetype=obj -o %t.o < %s +; RUN: llvm-dwarfdump %t.o | FileCheck %s +; +; Test that DW_AT_location is generated for a captured "self" inside a +; block. +; +; This test is split into two parts, the frontend part can be found at +; llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m +; +; CHECK: {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}} +; CHECK: DW_TAG_variable +; CHECK: {{.*}}DW_AT_name{{.*}}"self"{{.*}} +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_location +; +; CHECK: {{.*}}DW_AT_name{{.*}}_block_invoke{{.*}} +; CHECK: DW_TAG_variable +; CHECK: {{.*}}DW_AT_name{{.*}}"self"{{.*}} +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_location +; +; Generated (and then reduced) from +; ---------------------------------------------------------------------- +; +; @class T; +; @interface S +; @end +; @interface Mode +; -(int) count; +; @end +; @interface Context +; @end +; @interface ViewController +; @property (nonatomic, readwrite, strong) Context *context; +; @end +; typedef enum { +; Unknown = 0, +; } State; +; @interface Main : ViewController +; { +; T * t1; +; T * t2; +; } +; @property(readwrite, nonatomic) State state; +; @end +; @implementation Main +; - (id) initWithContext:(Context *) context +; { +; t1 = [self.context withBlock:^(id obj){ +; id *mode1; +; t2 = [mode1 withBlock:^(id object){ +; Mode *mode2 = object; +; if ([mode2 count] != 0) { +; self.state = 0; +; } +; }]; +; }]; +; } +; @end +; ---------------------------------------------------------------------- +; ModuleID = 'llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m' +%0 = type opaque +%struct.__block_descriptor = type { i64, i64 } +declare void @llvm.dbg.declare(metadata, metadata) #1 +define internal void @"__24-[Main initWithContext:]_block_invoke"(i8* %.block_descriptor, i8* %obj) #0 { + %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !84 + %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !84 + call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block}, metadata !86), !dbg !87 + ret void, !dbg !87 +} + +define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_descriptor, i8* %object) #0 { + %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103 + %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !103 + call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block}, metadata !105), !dbg !106 + ret void, !dbg !106 +} + +!llvm.dbg.cu = !{!0} +!0 = metadata !{i32 786449, i32 16, metadata !1, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC] +!1 = metadata !{i32 786473, metadata !107} ; [ DW_TAG_file_type ] +!2 = metadata !{metadata !3} +!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [from ] +!4 = metadata !{} +!15 = metadata !{i32 0} +!23 = metadata !{metadata !38, metadata !42} +!27 = metadata !{i32 786454, metadata !107, null, metadata !"id", i32 31, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ] +!28 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object] +!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ] +!30 = metadata !{metadata !31} +!31 = metadata !{i32 786445, metadata !107, metadata !29, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ] +!32 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class] +!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ] +!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, i32 0, i32 0, i32 16} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [fwd] [from ] +!38 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"", i32 33, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, metadata !15, i32 33} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke] +!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!40 = metadata !{null, metadata !41, metadata !27} +!41 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!42 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2] +!84 = metadata !{i32 33, i32 0, metadata !38, null} +!86 = metadata !{i32 786688, metadata !38, metadata !"self", metadata !1, i32 41, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 41] +!87 = metadata !{i32 41, i32 0, metadata !38, null} +!103 = metadata !{i32 35, i32 0, metadata !42, null} +!105 = metadata !{i32 786688, metadata !42, metadata !"self", metadata !1, i32 40, metadata !34, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 40] +!106 = metadata !{i32 40, i32 0, metadata !42, null} +!107 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", metadata !""} diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll new file mode 100644 index 0000000000..36ab611008 --- /dev/null +++ b/test/DebugInfo/X86/debug-info-blocks.ll @@ -0,0 +1,372 @@ +; RUN: llc -mtriple x86_64-apple-darwin -filetype=obj -o %t.o < %s +; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s + +; Generated from llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m +; rdar://problem/9279956 +; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} ) + +; CHECK: DW_AT_name{{.*}}_block_invoke +; CHECK-NOT: DW_TAG_subprogram +; CHECK: DW_TAG_formal_parameter +; CHECK-NOT: DW_TAG +; CHECK: .block_descriptor +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_location +; CHECK-NOT: DW_TAG_subprogram +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name{{.*}}"self" +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_type{{.*}}{[[APTR:.*]]} +; CHECK-NOT: DW_TAG +; CHECK: DW_AT_artificial +; CHECK-NOT: DW_TAG +; 0x06 = DW_OP_deref +; 0x23 = DW_OP_uconst +; 0x91 = DW_OP_fbreg +; CHECK: DW_AT_location{{.*}}91 {{[0-9]+}} 06 23 {{[0-9]+}} ) +; CHECK: DW_TAG_structure_type +; CHECK: [[A:.*]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_APPLE_objc_complete_type +; CHECK-NEXT: DW_AT_name{{.*}}"A" +; CHECK: [[APTR]]: DW_TAG_pointer_type [5] +; CHECK-NEXT: {[[A]]} + + +; ModuleID = 'llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m' +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-darwin" + +%0 = type opaque +%1 = type opaque +%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* } +%struct._objc_cache = type opaque +%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* } +%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] } +%struct._objc_method = type { i8*, i8*, i8* } +%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] } +%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32, i8** } +%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] } +%struct._prop_t = type { i8*, i8* } +%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] } +%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 } +%struct._message_ref_t = type { i8*, i8* } +%struct._objc_super = type { i8*, i8* } +%struct.__block_descriptor = type { i64, i64 } +%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* } + +@"OBJC_CLASS_$_A" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_A", %struct._class_t* @"OBJC_CLASS_$_NSObject", %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_CLASS_RO_$_A" }, section "__DATA, __objc_data", align 8 +@"\01L_OBJC_CLASSLIST_SUP_REFS_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_A", section "__DATA, __objc_superrefs, regular, no_dead_strip", align 8 +@"\01L_OBJC_METH_VAR_NAME_" = internal global [5 x i8] c"init\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@"\01L_OBJC_SELECTOR_REFERENCES_" = internal externally_initialized global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" +@"OBJC_CLASS_$_NSMutableDictionary" = external global %struct._class_t +@"\01L_OBJC_CLASSLIST_REFERENCES_$_" = internal global %struct._class_t* @"OBJC_CLASS_$_NSMutableDictionary", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8 +@"\01L_OBJC_METH_VAR_NAME_1" = internal global [6 x i8] c"alloc\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@"\01l_objc_msgSend_fixup_alloc" = weak hidden global { i8* (i8*, %struct._message_ref_t*, ...)*, i8* } { i8* (i8*, %struct._message_ref_t*, ...)* @objc_msgSend_fixup, i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0) }, section "__DATA, __objc_msgrefs, coalesced", align 16 +@"\01L_OBJC_METH_VAR_NAME_2" = internal global [6 x i8] c"count\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@"\01l_objc_msgSend_fixup_count" = weak hidden global { i8* (i8*, %struct._message_ref_t*, ...)*, i8* } { i8* (i8*, %struct._message_ref_t*, ...)* @objc_msgSend_fixup, i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0) }, section "__DATA, __objc_msgrefs, coalesced", align 16 +@"OBJC_IVAR_$_A.ivar" = global i64 0, section "__DATA, __objc_ivar", align 8 +@_NSConcreteStackBlock = external global i8* +@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00", align 1 +@__block_descriptor_tmp = internal constant { i64, i64, i8*, i8*, i8*, i64 } { i64 0, i64 40, i8* bitcast (void (i8*, i8*)* @__copy_helper_block_ to i8*), i8* bitcast (void (i8*)* @__destroy_helper_block_ to i8*), i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i64 256 } +@_objc_empty_cache = external global %struct._objc_cache +@_objc_empty_vtable = external global i8* (i8*, i8*)* +@"OBJC_METACLASS_$_NSObject" = external global %struct._class_t +@"\01L_OBJC_CLASS_NAME_" = internal global [2 x i8] c"A\00", section "__TEXT,__objc_classname,cstring_literals", align 1 +@"\01l_OBJC_METACLASS_RO_$_A" = internal global %struct._class_ro_t { i32 1, i32 40, i32 40, i8* null, i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* null, %struct._objc_protocol_list* null, %struct._ivar_list_t* null, i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8 +@"OBJC_METACLASS_$_A" = global %struct._class_t { %struct._class_t* @"OBJC_METACLASS_$_NSObject", %struct._class_t* @"OBJC_METACLASS_$_NSObject", %struct._objc_cache* @_objc_empty_cache, i8* (i8*, i8*)** @_objc_empty_vtable, %struct._class_ro_t* @"\01l_OBJC_METACLASS_RO_$_A" }, section "__DATA, __objc_data", align 8 +@"OBJC_CLASS_$_NSObject" = external global %struct._class_t +@"\01L_OBJC_METH_VAR_TYPE_" = internal global [8 x i8] c"@16@0:8\00", section "__TEXT,__objc_methtype,cstring_literals", align 1 +@"\01l_OBJC_$_INSTANCE_METHODS_A" = internal global { i32, i32, [1 x %struct._objc_method] } { i32 24, i32 1, [1 x %struct._objc_method] [%struct._objc_method { i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast (i8* (%0*, i8*)* @"\01-[A init]" to i8*) }] }, section "__DATA, __objc_const", align 8 +@"\01L_OBJC_METH_VAR_NAME_3" = internal global [5 x i8] c"ivar\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@"\01L_OBJC_METH_VAR_TYPE_4" = internal global [2 x i8] c"i\00", section "__TEXT,__objc_methtype,cstring_literals", align 1 +@"\01l_OBJC_$_INSTANCE_VARIABLES_A" = internal global { i32, i32, [1 x %struct._ivar_t] } { i32 32, i32 1, [1 x %struct._ivar_t] [%struct._ivar_t { i64* @"OBJC_IVAR_$_A.ivar", i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i32 2, i32 4 }] }, section "__DATA, __objc_const", align 8 +@"\01l_OBJC_CLASS_RO_$_A" = internal global %struct._class_ro_t { i32 0, i32 0, i32 4, i8* null, i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct.__method_list_t* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to %struct.__method_list_t*), %struct._objc_protocol_list* null, %struct._ivar_list_t* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to %struct._ivar_list_t*), i8* null, %struct._prop_list_t* null }, section "__DATA, __objc_const", align 8 +@"\01L_OBJC_CLASSLIST_REFERENCES_$_5" = internal global %struct._class_t* @"OBJC_CLASS_$_A", section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8 +@"\01L_OBJC_LABEL_CLASS_$" = internal global [1 x i8*] [i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_A" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8 +@llvm.used = appending global [14 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata" + +define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 { + %1 = alloca %0*, align 8 + %2 = alloca i8*, align 8 + %3 = alloca %struct._objc_super + %4 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8 + store %0* %self, %0** %1, align 8 + call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !60), !dbg !62 + store i8* %_cmd, i8** %2, align 8 + call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !63), !dbg !62 + %5 = load %0** %1, !dbg !65 + %6 = bitcast %0* %5 to i8*, !dbg !65 + %7 = getelementptr inbounds %struct._objc_super* %3, i32 0, i32 0, !dbg !65 + store i8* %6, i8** %7, !dbg !65 + %8 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_", !dbg !65 + %9 = bitcast %struct._class_t* %8 to i8*, !dbg !65 + %10 = getelementptr inbounds %struct._objc_super* %3, i32 0, i32 1, !dbg !65 + store i8* %9, i8** %10, !dbg !65 + %11 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !65, !invariant.load !67 + %12 = call i8* bitcast (i8* (%struct._objc_super*, i8*, ...)* @objc_msgSendSuper2 to i8* (%struct._objc_super*, i8*)*)(%struct._objc_super* %3, i8* %11), !dbg !65 + %13 = bitcast i8* %12 to %0*, !dbg !65 + store %0* %13, %0** %1, align 8, !dbg !65 + %14 = icmp ne %0* %13, null, !dbg !65 + br i1 %14, label %15, label %24, !dbg !65 + +; <label>:15 ; preds = %0 + %16 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 0, !dbg !68 + store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %16, !dbg !68 + %17 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 1, !dbg !68 + store i32 -1040187392, i32* %17, !dbg !68 + %18 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 2, !dbg !68 + store i32 0, i32* %18, !dbg !68 + %19 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 3, !dbg !68 + store i8* bitcast (void (i8*)* @"__9-[A init]_block_invoke" to i8*), i8** %19, !dbg !68 + %20 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 4, !dbg !68 + store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i64 }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %20, !dbg !68 + %21 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !68 + %22 = load %0** %1, align 8, !dbg !68 + store %0* %22, %0** %21, align 8, !dbg !68 + %23 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4 to void ()*, !dbg !68 + call void @run(void ()* %23), !dbg !68 + br label %24, !dbg !70 + +; <label>:24 ; preds = %15, %0 + %25 = load %0** %1, align 8, !dbg !71 + %26 = bitcast %0* %25 to i8*, !dbg !71 + ret i8* %26, !dbg !71 +} + +declare void @llvm.dbg.declare(metadata, metadata) #1 + +declare i8* @objc_msgSendSuper2(%struct._objc_super*, i8*, ...) + +define internal void @run(void ()* %block) #0 { + %1 = alloca void ()*, align 8 + store void ()* %block, void ()** %1, align 8 + call void @llvm.dbg.declare(metadata !{void ()** %1}, metadata !72), !dbg !73 + %2 = load void ()** %1, align 8, !dbg !74 + %3 = bitcast void ()* %2 to %struct.__block_literal_generic*, !dbg !74 + %4 = getelementptr inbounds %struct.__block_literal_generic* %3, i32 0, i32 3, !dbg !74 + %5 = bitcast %struct.__block_literal_generic* %3 to i8*, !dbg !74 + %6 = load i8** %4, !dbg !74 + %7 = bitcast i8* %6 to void (i8*)*, !dbg !74 + call void %7(i8* %5), !dbg !74 + ret void, !dbg !75 +} + +define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 { + %1 = alloca i8*, align 8 + %2 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, align 8 + %d = alloca %1*, align 8 + store i8* %.block_descriptor, i8** %1, align 8 + %3 = load i8** %1 + call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !76), !dbg !88 + call void @llvm.dbg.declare(metadata !{i8* %.block_descriptor}, metadata !76), !dbg !88 + %4 = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !88 + store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2, align 8, !dbg !88 + %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !88 + call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2}, metadata !89), !dbg !90 + call void @llvm.dbg.declare(metadata !{%1** %d}, metadata !91), !dbg !100 + %6 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", !dbg !100 + %7 = bitcast %struct._class_t* %6 to i8*, !dbg !100 + %8 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !100 + %9 = bitcast i8* %8 to i8* (i8*, i8*)*, !dbg !100 + %10 = call i8* %9(i8* %7, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !100 + %11 = bitcast i8* %10 to %1*, !dbg !100 + %12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !100, !invariant.load !67 + %13 = bitcast %1* %11 to i8*, !dbg !100 + %14 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %13, i8* %12), !dbg !100 + %15 = bitcast i8* %14 to %1*, !dbg !100 + store %1* %15, %1** %d, align 8, !dbg !100 + %16 = load %1** %d, align 8, !dbg !101 + %17 = bitcast %1* %16 to i8*, !dbg !101 + %18 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to %struct._message_ref_t*), i32 0, i32 0), !dbg !101 + %19 = bitcast i8* %18 to i32 (i8*, i8*)*, !dbg !101 + %20 = call i32 %19(i8* %17, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to i8*)), !dbg !101 + %21 = add nsw i32 42, %20, !dbg !101 + %22 = load %0** %5, align 8, !dbg !101 + %23 = load i64* @"OBJC_IVAR_$_A.ivar", !dbg !101, !invariant.load !67 + %24 = bitcast %0* %22 to i8*, !dbg !101 + %25 = getelementptr inbounds i8* %24, i64 %23, !dbg !101 + %26 = bitcast i8* %25 to i32*, !dbg !101 + store i32 %21, i32* %26, align 4, !dbg !101 + ret void, !dbg !90 +} + +declare void @llvm.dbg.value(metadata, i64, metadata) #1 + +declare i8* @objc_msgSend_fixup(i8*, %struct._message_ref_t*, ...) + +declare i8* @objc_msgSend(i8*, i8*, ...) #2 + +define internal void @__copy_helper_block_(i8*, i8*) { + %3 = alloca i8*, align 8 + %4 = alloca i8*, align 8 + store i8* %0, i8** %3, align 8 + call void @llvm.dbg.declare(metadata !{i8** %3}, metadata !102), !dbg !103 + store i8* %1, i8** %4, align 8 + call void @llvm.dbg.declare(metadata !{i8** %4}, metadata !104), !dbg !103 + %5 = load i8** %4, !dbg !103 + %6 = bitcast i8* %5 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103 + %7 = load i8** %3, !dbg !103 + %8 = bitcast i8* %7 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103 + %9 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %6, i32 0, i32 5, !dbg !103 + %10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %8, i32 0, i32 5, !dbg !103 + %11 = load %0** %9, !dbg !103 + %12 = bitcast %0* %11 to i8*, !dbg !103 + %13 = bitcast %0** %10 to i8*, !dbg !103 + call void @_Block_object_assign(i8* %13, i8* %12, i32 3) #3, !dbg !103 + ret void, !dbg !103 +} + +declare void @_Block_object_assign(i8*, i8*, i32) + +define internal void @__destroy_helper_block_(i8*) { + %2 = alloca i8*, align 8 + store i8* %0, i8** %2, align 8 + call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !105), !dbg !106 + %3 = load i8** %2, !dbg !106 + %4 = bitcast i8* %3 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !106 + %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !106 + %6 = load %0** %5, !dbg !106 + %7 = bitcast %0* %6 to i8*, !dbg !106 + call void @_Block_object_dispose(i8* %7, i32 3) #3, !dbg !106 + ret void, !dbg !106 +} + +declare void @_Block_object_dispose(i8*, i32) + +define i32 @main() #0 { + %1 = alloca i32, align 4 + %a = alloca %0*, align 8 + store i32 0, i32* %1 + call void @llvm.dbg.declare(metadata !{%0** %a}, metadata !107), !dbg !108 + %2 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5", !dbg !108 + %3 = bitcast %struct._class_t* %2 to i8*, !dbg !108 + %4 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !108 + %5 = bitcast i8* %4 to i8* (i8*, i8*)*, !dbg !108 + %6 = call i8* %5(i8* %3, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !108 + %7 = bitcast i8* %6 to %0*, !dbg !108 + %8 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !108, !invariant.load !67 + %9 = bitcast %0* %7 to i8*, !dbg !108 + %10 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %9, i8* %8), !dbg !108 + %11 = bitcast i8* %10 to %0*, !dbg !108 + store %0* %11, %0** %a, align 8, !dbg !108 + ret i32 0, !dbg !109 +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nonlazybind } +attributes #3 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!56, !57, !58, !59} + +!0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC] +!1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/<unknown>", metadata !"llvm/_build.ninja.Debug"} +!2 = metadata !{i32 0} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [from ] +!5 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m", metadata !"llvm/_build.ninja.Debug"} +!6 = metadata !{i32 786473, metadata !5} ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m] +!7 = metadata !{metadata !8, metadata !10} +!8 = metadata !{i32 786460, null, metadata !4, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject] +!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [from ] +!10 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"ivar", i32 35, i64 32, i64 32, i64 0, i32 0, metadata !11, null} ; [ DW_TAG_member ] [ivar] [line 35, size 32, align 32, offset 0] [from int] +!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!12 = metadata !{metadata !13, metadata !27, metadata !31, metadata !35, metadata !36, metadata !39} +!13 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"-[A init]", metadata !"-[A init]", metadata !"", i32 46, metadata !14, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*)* @"\01-[A init]", null, null, metadata !2, i32 46} ; [ DW_TAG_subprogram ] [line 46] [local] [def] [-[A init]] +!14 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!15 = metadata !{metadata !16, metadata !23, metadata !24} +!16 = metadata !{i32 786454, metadata !5, null, metadata !"id", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [id] [line 46, size 0, align 0, offset 0] [from ] +!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object] +!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [from ] +!19 = metadata !{metadata !20} +!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ] +!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class] +!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [fwd] [from ] +!23 = metadata !{i32 786447, i32 0, i32 0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A] +!24 = metadata !{i32 786454, metadata !5, i32 0, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ] +!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector] +!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [fwd] [from ] +!27 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__9-[A init]_block_invoke", metadata !"__9-[A init]_block_invoke", metadata !"", i32 49, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @"__9-[A init]_block_invoke", null, null, metadata !2, i32 49} ; [ DW_TAG_subprogram ] [line 49] [local] [def] [__9-[A init]_block_invoke] +!28 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!29 = metadata !{null, metadata !30} +!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!31 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 52, metadata !33, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*, i8*)* @__copy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__copy_helper_block_] +!32 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] +!33 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!34 = metadata !{null, metadata !30, metadata !30} +!35 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 52, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*)* @__destroy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__destroy_helper_block_] +!36 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !37, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 60} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 60] [main] +!37 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!38 = metadata !{metadata !11} +!39 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 39, metadata !40, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (void ()*)* @run, null, null, metadata !2, i32 40} ; [ DW_TAG_subprogram ] [line 39] [local] [def] [scope 40] [run] +!40 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!41 = metadata !{null, metadata !42} +!42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !43} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic] +!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [from ] +!44 = metadata !{metadata !45, metadata !46, metadata !47, metadata !48, metadata !49} +!45 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ] +!46 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int] +!47 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 0, size 32, align 32, offset 96] [from int] +!48 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ] +!49 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 40, i64 64, i64 64, i64 192, i32 0, metadata !50} ; [ DW_TAG_member ] [__descriptor] [line 40, size 64, align 64, offset 192] [from ] +!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor] +!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [from ] +!52 = metadata !{metadata !53, metadata !55} +!53 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !54} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int] +!54 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned] +!55 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !54} ; [ DW_TAG_member ] [Size] [line 0, size 64, align 64, offset 64] [from long unsigned int] +!56 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} +!57 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} +!58 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} +!59 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} +!60 = metadata !{i32 786689, metadata !13, metadata !"self", metadata !32, i32 16777262, metadata !61, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [self] [line 46] +!61 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A] +!62 = metadata !{i32 46, i32 0, metadata !13, null} +!63 = metadata !{i32 786689, metadata !13, metadata !"_cmd", metadata !32, i32 33554478, metadata !64, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [_cmd] [line 46] +!64 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [from ] +!65 = metadata !{i32 48, i32 0, metadata !66, null} +!66 = metadata !{i32 786443, metadata !5, metadata !13, i32 47, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m] +!67 = metadata !{} +!68 = metadata !{i32 49, i32 0, metadata !69, null} +!69 = metadata !{i32 786443, metadata !5, metadata !66, i32 48, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m] +!70 = metadata !{i32 53, i32 0, metadata !69, null} +!71 = metadata !{i32 54, i32 0, metadata !66, null} +!72 = metadata !{i32 786689, metadata !39, metadata !"block", metadata !6, i32 16777255, metadata !42, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [block] [line 39] +!73 = metadata !{i32 39, i32 0, metadata !39, null} +!74 = metadata !{i32 41, i32 0, metadata !39, null} +!75 = metadata !{i32 42, i32 0, metadata !39, null} +!76 = metadata !{i32 786689, metadata !27, metadata !".block_descriptor", metadata !6, i32 16777265, metadata !77, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 49] +!77 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1] +!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [from ] +!79 = metadata !{metadata !80, metadata !81, metadata !82, metadata !83, metadata !84, metadata !87} +!80 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 49, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 49, size 64, align 64, offset 0] [from ] +!81 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 49, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 49, size 32, align 32, offset 64] [from int] +!82 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 49, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 49, size 32, align 32, offset 96] [from int] +!83 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 49, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 49, size 64, align 64, offset 128] [from ] +!84 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 49, i64 64, i64 64, i64 192, i32 0, metadata !85} ; [ DW_TAG_member ] [__descriptor] [line 49, size 64, align 64, offset 192] [from ] +!85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !86} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose] +!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [fwd] [from ] +!87 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"self", i32 49, i64 64, i64 64, i64 256, i32 0, metadata !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ] +!88 = metadata !{i32 49, i32 0, metadata !27, null} +!89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, i64 2, i64 1, i64 32} ; [ DW_TAG_auto_variable ] [self] [line 52] +!90 = metadata !{i32 52, i32 0, metadata !27, null} +!91 = metadata !{i32 786688, metadata !92, metadata !"d", metadata !6, i32 50, metadata !93, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 50] +!92 = metadata !{i32 786443, metadata !5, metadata !27, i32 49, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m] +!93 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !94} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from NSMutableDictionary] +!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [from ] +!95 = metadata !{metadata !96} +!96 = metadata !{i32 786460, null, metadata !94, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !97} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSDictionary] +!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [from ] +!98 = metadata !{metadata !99} +!99 = metadata !{i32 786460, null, metadata !97, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject] +!100 = metadata !{i32 50, i32 0, metadata !92, null} +!101 = metadata !{i32 51, i32 0, metadata !92, null} +!102 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52] +!103 = metadata !{i32 52, i32 0, metadata !31, null} +!104 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 33554484, metadata !30, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [line 52] +!105 = metadata !{i32 786689, metadata !35, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52] +!106 = metadata !{i32 52, i32 0, metadata !35, null} +!107 = metadata !{i32 786688, metadata !36, metadata !"a", metadata !6, i32 61, metadata !61, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 61] +!108 = metadata !{i32 61, i32 0, metadata !36, null} +!109 = metadata !{i32 62, i32 0, metadata !36, null} diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll index c03d19addd..50a2b3fa51 100644 --- a/test/DebugInfo/X86/debug-info-static-member.ll +++ b/test/DebugInfo/X86/debug-info-static-member.ll @@ -58,36 +58,37 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"debug-info-static-member.cpp", metadata !"/home/probinson/projects/upstream/static-member/test", metadata !"clang version 3.3 (trunk 171914)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main] -!6 = metadata !{i32 786473, metadata !"/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", metadata !"/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main] +!6 = metadata !{i32 786473, metadata !33} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !10 = metadata !{metadata !12, metadata !27, metadata !28} !12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"a", metadata !"a", metadata !"_ZN1C1aE", metadata !6, i32 14, metadata !9, i32 0, i32 1, i32* @_ZN1C1aE, metadata !15} ; [ DW_TAG_variable ] [a] [line 14] [def] -!13 = metadata !{i32 786434, null, metadata !"C", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ] +!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [from ] !14 = metadata !{metadata !15, metadata !16, metadata !19, metadata !20, metadata !23, metadata !24, metadata !26} -!15 = metadata !{i32 786445, metadata !13, metadata !"a", metadata !6, i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int] -!16 = metadata !{i32 786445, metadata !13, metadata !"const_a", metadata !6, i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ] -!17 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool] -!18 = metadata !{i32 786468, null, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean] -!19 = metadata !{i32 786445, metadata !13, metadata !"b", metadata !6, i32 6, i64 0, i64 0, i64 0, i32 4098, metadata !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int] -!20 = metadata !{i32 786445, metadata !13, metadata !"const_b", metadata !6, i32 7, i64 0, i64 0, i64 0, i32 4098, metadata !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ] -!21 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float] -!22 = metadata !{i32 786468, null, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float] -!23 = metadata !{i32 786445, metadata !13, metadata !"c", metadata !6, i32 9, i64 0, i64 0, i64 0, i32 4096, metadata !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int] -!24 = metadata !{i32 786445, metadata !13, metadata !"const_c", metadata !6, i32 10, i64 0, i64 0, i64 0, i32 4096, metadata !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ] -!25 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int] -!26 = metadata !{i32 786445, metadata !13, metadata !"d", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int] +!15 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"a", i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int] +!16 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_a", i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ] +!17 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool] +!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean] +!19 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"b", i32 6, i64 0, i64 0, i64 0, i32 4098, metadata !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int] +!20 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_b", i32 7, i64 0, i64 0, i64 0, i32 4098, metadata !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ] +!21 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float] +!22 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float] +!23 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"c", i32 9, i64 0, i64 0, i64 0, i32 4096, metadata !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int] +!24 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_c", i32 10, i64 0, i64 0, i64 0, i32 4096, metadata !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ] +!25 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int] +!26 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"d", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int] !27 = metadata !{i32 786484, i32 0, metadata !13, metadata !"b", metadata !"b", metadata !"_ZN1C1bE", metadata !6, i32 15, metadata !9, i32 0, i32 1, i32* @_ZN1C1bE, metadata !19} ; [ DW_TAG_variable ] [b] [line 15] [def] !28 = metadata !{i32 786484, i32 0, metadata !13, metadata !"c", metadata !"c", metadata !"_ZN1C1cE", metadata !6, i32 16, metadata !9, i32 0, i32 1, i32* @_ZN1C1cE, metadata !23} ; [ DW_TAG_variable ] [c] [line 16] [def] !29 = metadata !{i32 786688, metadata !5, metadata !"instance_C", metadata !6, i32 20, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [instance_C] [line 20] !30 = metadata !{i32 20, i32 0, metadata !5, null} !31 = metadata !{i32 21, i32 0, metadata !5, null} !32 = metadata !{i32 22, i32 0, metadata !5, null} +!33 = metadata !{metadata !"/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", metadata !"/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm"} ; PRESENT verifies that static member declarations have these attributes: ; external, declaration, accessibility, and either DW_AT_MIPS_linkage_name ; (for variables) or DW_AT_const_value (for constants). diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll index f4df0b7d15..30e8c2e274 100644 --- a/test/DebugInfo/X86/elf-names.ll +++ b/test/DebugInfo/X86/elf-names.ll @@ -58,36 +58,36 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo", metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !31} -!5 = metadata !{i32 786478, i32 0, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", metadata !6, i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D] -!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D] +!6 = metadata !{i32 786473, metadata !53} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{null, metadata !9} !9 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D] -!10 = metadata !{i32 786434, null, metadata !"D", metadata !6, i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ] +!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [from ] !11 = metadata !{metadata !12, metadata !14, metadata !15, metadata !16, metadata !17, metadata !20} -!12 = metadata !{i32 786445, metadata !10, metadata !"c1", metadata !6, i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int] -!13 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!14 = metadata !{i32 786445, metadata !10, metadata !"c2", metadata !6, i32 7, i64 32, i64 32, i64 32, i32 1, metadata !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int] -!15 = metadata !{i32 786445, metadata !10, metadata !"c3", metadata !6, i32 8, i64 32, i64 32, i64 64, i32 1, metadata !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int] -!16 = metadata !{i32 786445, metadata !10, metadata !"c4", metadata !6, i32 9, i64 32, i64 32, i64 96, i32 1, metadata !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int] -!17 = metadata !{i32 786478, i32 0, metadata !10, metadata !"D", metadata !"D", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [D] +!12 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c1", i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int] +!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!14 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c2", i32 7, i64 32, i64 32, i64 32, i32 1, metadata !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int] +!15 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c3", i32 8, i64 32, i64 32, i64 64, i32 1, metadata !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int] +!16 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c4", i32 9, i64 32, i64 32, i64 96, i32 1, metadata !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int] +!17 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [D] !18 = metadata !{metadata !19} !19 = metadata !{i32 786468} ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0] -!20 = metadata !{i32 786478, i32 0, metadata !10, metadata !"D", metadata !"D", metadata !"", metadata !6, i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D] +!20 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D] !21 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !22 = metadata !{null, metadata !9, metadata !23} !23 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ] -!24 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D] +!24 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D] !25 = metadata !{metadata !26} !26 = metadata !{i32 786468} ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0] !27 = metadata !{metadata !28} !28 = metadata !{metadata !29} !29 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777228, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 12] -!30 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D] -!31 = metadata !{i32 786478, i32 0, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", metadata !6, i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D] +!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D] +!31 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D] !32 = metadata !{metadata !33} !33 = metadata !{metadata !34, metadata !35} !34 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777235, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 19] @@ -109,3 +109,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !50 = metadata !{i32 22, i32 0, metadata !48, null} !51 = metadata !{i32 23, i32 0, metadata !48, null} !52 = metadata !{i32 24, i32 0, metadata !48, null} +!53 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo"} diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll index c3bdbc4c55..6e59915fe1 100644 --- a/test/DebugInfo/X86/empty-and-one-elem-array.ll +++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll @@ -59,33 +59,34 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"func", metadata !"func", metadata !"", metadata !6, i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func] -!6 = metadata !{i32 786473, metadata !"test.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"func", metadata !"func", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func] +!6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !10 = metadata !{i32 786688, metadata !11, metadata !"my_foo", metadata !6, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_foo] [line 12] -!11 = metadata !{i32 786443, metadata !5, i32 11, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c] -!12 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ] +!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c] +!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ] !13 = metadata !{metadata !14, metadata !15} -!14 = metadata !{i32 786445, metadata !12, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int] -!15 = metadata !{i32 786445, metadata !12, metadata !"b", metadata !6, i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ] -!16 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int] +!14 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int] +!15 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"b", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ] +!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int] !17 = metadata !{metadata !18} !18 = metadata !{i32 786465, i64 0, i64 1} ; [ DW_TAG_subrange_type ] [0, 1] !19 = metadata !{i32 12, i32 0, metadata !11, null} !20 = metadata !{i32 786688, metadata !11, metadata !"my_bar", metadata !6, i32 13, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_bar] [line 13] -!21 = metadata !{i32 786451, null, metadata !"bar", metadata !6, i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ] +!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ] !22 = metadata !{metadata !23, metadata !24} -!23 = metadata !{i32 786445, metadata !21, metadata !"a", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int] -!24 = metadata !{i32 786445, metadata !21, metadata !"b", metadata !6, i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ] -!25 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int] +!23 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"a", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int] +!24 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"b", i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ] +!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int] !26 = metadata !{metadata !27} !27 = metadata !{i32 786465, i64 0, i64 0} ; [ DW_TAG_subrange_type ] [0, 0] !28 = metadata !{i32 13, i32 0, metadata !11, null} !29 = metadata !{i32 15, i32 0, metadata !11, null} !30 = metadata !{i32 16, i32 0, metadata !11, null} !31 = metadata !{i32 17, i32 0, metadata !11, null} +!32 = metadata !{metadata !"test.c", metadata !"/Volumes/Sandbox/llvm"} diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll index e4ec4bc9cf..ace115610e 100644 --- a/test/DebugInfo/X86/empty-array.ll +++ b/test/DebugInfo/X86/empty-array.ll @@ -24,21 +24,22 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def] -!6 = metadata !{i32 786473, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ] +!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ] !8 = metadata !{metadata !9, metadata !14} -!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ] -!10 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int] -!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ] +!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int] +!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !12 = metadata !{metadata !13} !13 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] [unbound] -!14 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A] +!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A] !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !16 = metadata !{null, metadata !17} !17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A] !18 = metadata !{metadata !19} !19 = metadata !{i32 786468} ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0] +!20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"} diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll index 3813e1f3fc..6de15f6404 100644 --- a/test/DebugInfo/X86/ending-run.ll +++ b/test/DebugInfo/X86/ending-run.ll @@ -28,11 +28,11 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !19, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9, metadata !9} !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -41,7 +41,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !13 = metadata !{i32 5, i32 5, metadata !5, null} !14 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !6, i32 8, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] -!15 = metadata !{i32 786443, metadata !5, i32 7, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!15 = metadata !{i32 786443, metadata !19, metadata !5, i32 7, i32 1, i32 0} ; [ DW_TAG_lexical_block ] !16 = metadata !{i32 8, i32 9, metadata !15, null} !17 = metadata !{i32 8, i32 18, metadata !15, null} !18 = metadata !{i32 9, i32 5, metadata !15, null} +!19 = metadata !{metadata !"ending-run.c", metadata !"/Users/echristo/tmp"} diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll index e728a1e607..22728116d9 100644 --- a/test/DebugInfo/X86/enum-class.ll +++ b/test/DebugInfo/X86/enum-class.ll @@ -7,18 +7,18 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{metadata !3, metadata !8, metadata !12} -!3 = metadata !{i32 786436, null, metadata !"A", metadata !4, i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] -!4 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] -!5 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!4 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !6 = metadata !{metadata !7} !7 = metadata !{i32 786472, metadata !"A1", i64 1} ; [ DW_TAG_enumerator ] -!8 = metadata !{i32 786436, null, metadata !"B", metadata !4, i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] -!9 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!9 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] !10 = metadata !{metadata !11} !11 = metadata !{i32 786472, metadata !"B1", i64 1} ; [ DW_TAG_enumerator ] -!12 = metadata !{i32 786436, null, metadata !"C", metadata !4, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] !13 = metadata !{metadata !14} !14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ] !15 = metadata !{i32 0} @@ -26,6 +26,7 @@ !19 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !4, i32 4, metadata !3, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ] !20 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !4, i32 5, metadata !8, i32 0, i32 1, i64* @b, null} ; [ DW_TAG_variable ] !21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c, null} ; [ DW_TAG_variable ] +!22 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"} ; CHECK: DW_TAG_enumeration_type [3] ; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x0026 => {0x00000026}) diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll index 89bcd5ee7f..33d807e305 100644 --- a/test/DebugInfo/X86/enum-fwd-decl.ll +++ b/test/DebugInfo/X86/enum-fwd-decl.ll @@ -5,12 +5,12 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/tmp", metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def] !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/tmp", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786436, null, metadata !"E", metadata !6, i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ] +!7 = metadata !{i32 786436, metadata !6, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [fwd] [from ] ; CHECK: DW_TAG_enumeration_type ; CHECK-NEXT: DW_AT_name diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll index 7ed712a9c4..bfe2d17e20 100644 --- a/test/DebugInfo/X86/fission-cu.ll +++ b/test/DebugInfo/X86/fission-cu.ll @@ -5,12 +5,13 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def] -!6 = metadata !{i32 786473, metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!6 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!8 = metadata !{metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp"} ; Check that the skeleton compile unit contains the proper attributes: ; This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll index 92dd0724a8..0c0a7ab51d 100644 --- a/test/DebugInfo/X86/line-info.ll +++ b/test/DebugInfo/X86/line-info.ll @@ -1,7 +1,8 @@ ; RUN: llc -mtriple=x86_64-apple-darwin -filetype=obj -O0 < %s > %t ; RUN: llvm-dwarfdump %t | FileCheck %s -; CHECK: 2 0 1 0 is_stmt +; CHECK: [[FILEID:[0-9]+]]]{{.*}}list0.h +; CHECK: [[FILEID]] 0 1 0 is_stmt{{$}} ; IR generated from clang -g -emit-llvm with the following source: ; list0.h: @@ -13,7 +14,7 @@ ; int main() { ; } -define i32 @foo(i32 %x) nounwind uwtable { +define i32 @foo(i32 %x) #0 { entry: %x.addr = alloca i32, align 4 store i32 %x, i32* %x.addr, align 4 @@ -24,29 +25,34 @@ entry: ret i32 %inc, !dbg !16 } -declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata) #1 -define i32 @main() nounwind uwtable { +define i32 @main() #0 { entry: ret i32 0, !dbg !17 } +attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", metadata !"clang version 3.3 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr14566/list0.c] [DW_LANG_C99] -!1 = metadata !{i32 0} -!3 = metadata !{metadata !5, metadata !10} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] -!6 = metadata !{i32 786473, metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99] +!1 = metadata !{metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch"} +!2 = metadata !{i32 0} +!3 = metadata !{metadata !4, metadata !10} +!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!5 = metadata !{metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch"} +!6 = metadata !{i32 786473, metadata !5} ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/./list0.h] +!7 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9, metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!10 = metadata !{i32 786478, i32 0, metadata !11, metadata !"main", metadata !"main", metadata !"", metadata !11, i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main] -!11 = metadata !{i32 786473, metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch/pr14566", null} ; [ DW_TAG_file_type ] -!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main] +!11 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] +!12 = metadata !{i32 786453, i32 0, i32 0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !13 = metadata !{metadata !9} -!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1] -!15 = metadata !{i32 1, i32 0, metadata !5, null} -!16 = metadata !{i32 2, i32 0, metadata !5, null} +!14 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1] +!15 = metadata !{i32 1, i32 0, metadata !4, null} +!16 = metadata !{i32 2, i32 0, metadata !4, null} !17 = metadata !{i32 3, i32 0, metadata !18, null} -!18 = metadata !{i32 786443, metadata !10, metadata !11} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/pr14566/list0.c] +!18 = metadata !{i32 786443, metadata !11, metadata !10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll index b1d8645086..9440f3a994 100644 --- a/test/DebugInfo/X86/linkage-name.ll +++ b/test/DebugInfo/X86/linkage-name.ll @@ -26,18 +26,18 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, metadata !16, i32 5} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9, metadata !10, metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !10 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ] -!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ] +!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ] !12 = metadata !{metadata !13} -!13 = metadata !{i32 786478, i32 0, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ] +!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ] !14 = metadata !{metadata !15} !15 = metadata !{i32 786468} ; [ DW_TAG_base_type ] !16 = metadata !{metadata !17} @@ -45,9 +45,10 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !18 = metadata !{metadata !20} !20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] !21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ] -!22 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] +!22 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] !23 = metadata !{i32 5, i32 8, metadata !5, null} !24 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554437, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !25 = metadata !{i32 5, i32 14, metadata !5, null} !26 = metadata !{i32 6, i32 4, metadata !27, null} -!27 = metadata !{i32 786443, metadata !5, i32 5, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!27 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 17, i32 0} ; [ DW_TAG_lexical_block ] +!28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"} diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll index 2240f362f7..4dd5aafe18 100644 --- a/test/DebugInfo/X86/low-pc-cu.ll +++ b/test/DebugInfo/X86/low-pc-cu.ll @@ -14,16 +14,16 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !12} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"q", metadata !"q", metadata !"_Z1qv", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 786478, metadata !"_Z1qv", i32 0, metadata !6, metadata !"q", metadata !"q", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9} !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !10 = metadata !{metadata !11} !11 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"t", metadata !"t", metadata !"", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ] +!12 = metadata !{i32 786478, metadata !"", i32 0, metadata !6, metadata !"t", metadata !"t", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ] !13 = metadata !{i32 7, i32 1, metadata !14, null} !14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll index 6c1032e1ff..0980e23b75 100644 --- a/test/DebugInfo/X86/misched-dbg-value.ll +++ b/test/DebugInfo/X86/misched-dbg-value.ll @@ -89,10 +89,10 @@ attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2", metadata !"clang version 3.3 (trunk 175015)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, i32 12, metadata !3, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99] !1 = metadata !{metadata !2} -!2 = metadata !{i32 786436, null, metadata !"", metadata !3, i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ] -!3 = metadata !{i32 786473, metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2", null} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [from ] +!3 = metadata !{i32 786473, metadata !82} ; [ DW_TAG_file_type ] !4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8, metadata !9} !5 = metadata !{i32 786472, metadata !"Ident1", i64 0} ; [ DW_TAG_enumerator ] [Ident1 :: 0] !6 = metadata !{i32 786472, metadata !"Ident2", i64 10000} ; [ DW_TAG_enumerator ] [Ident2 :: 10000] @@ -101,16 +101,16 @@ attributes #1 = { nounwind readnone } !9 = metadata !{i32 786472, metadata !"Ident5", i64 10003} ; [ DW_TAG_enumerator ] [Ident5 :: 10003] !10 = metadata !{i32 0} !11 = metadata !{metadata !12} -!12 = metadata !{i32 786478, i32 0, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", metadata !3, i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8] +!12 = metadata !{i32 786478, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", metadata !3, i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8] !13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !14 = metadata !{null, metadata !15, metadata !17, metadata !21, metadata !21} -!15 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] -!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!17 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] -!18 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int] +!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int] +!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ] +!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int] !19 = metadata !{metadata !20} !20 = metadata !{i32 786465, i64 0, i64 51} ; [ DW_TAG_subrange_type ] [0, 50] -!21 = metadata !{i32 786454, null, metadata !"OneToFifty", metadata !3, i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int] +!21 = metadata !{i32 786454, metadata !82, null, metadata !"OneToFifty", i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int] !22 = metadata !{metadata !23, metadata !24, metadata !25, metadata !26, metadata !27, metadata !28} !23 = metadata !{i32 786689, metadata !12, metadata !"Array1Par", metadata !3, i32 16777397, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array1Par] [line 181] !24 = metadata !{i32 786689, metadata !12, metadata !"Array2Par", metadata !3, i32 33554614, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array2Par] [line 182] @@ -120,36 +120,36 @@ attributes #1 = { nounwind readnone } !28 = metadata !{i32 786688, metadata !12, metadata !"IntIndex", metadata !3, i32 187, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187] !29 = metadata !{metadata !30, metadata !35, metadata !36, metadata !38, metadata !39, metadata !40, metadata !42, metadata !46, metadata !63} !30 = metadata !{i32 786484, i32 0, null, metadata !"Version", metadata !"Version", metadata !"", metadata !3, i32 111, metadata !31, i32 0, i32 1, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def] -!31 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char] -!32 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char] +!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char] +!32 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char] !33 = metadata !{metadata !34} !34 = metadata !{i32 786465, i64 0, i64 4} ; [ DW_TAG_subrange_type ] [0, 3] !35 = metadata !{i32 786484, i32 0, null, metadata !"IntGlob", metadata !"IntGlob", metadata !"", metadata !3, i32 171, metadata !16, i32 0, i32 1, i32* @IntGlob, null} ; [ DW_TAG_variable ] [IntGlob] [line 171] [def] !36 = metadata !{i32 786484, i32 0, null, metadata !"BoolGlob", metadata !"BoolGlob", metadata !"", metadata !3, i32 172, metadata !37, i32 0, i32 1, i32* @BoolGlob, null} ; [ DW_TAG_variable ] [BoolGlob] [line 172] [def] -!37 = metadata !{i32 786454, null, metadata !"boolean", metadata !3, i32 149, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int] +!37 = metadata !{i32 786454, metadata !82, null, metadata !"boolean", i32 149, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int] !38 = metadata !{i32 786484, i32 0, null, metadata !"Char1Glob", metadata !"Char1Glob", metadata !"", metadata !3, i32 173, metadata !32, i32 0, i32 1, i8* @Char1Glob, null} ; [ DW_TAG_variable ] [Char1Glob] [line 173] [def] !39 = metadata !{i32 786484, i32 0, null, metadata !"Char2Glob", metadata !"Char2Glob", metadata !"", metadata !3, i32 174, metadata !32, i32 0, i32 1, i8* @Char2Glob, null} ; [ DW_TAG_variable ] [Char2Glob] [line 174] [def] !40 = metadata !{i32 786484, i32 0, null, metadata !"Array1Glob", metadata !"Array1Glob", metadata !"", metadata !3, i32 175, metadata !41, i32 0, i32 1, [51 x i32]* @Array1Glob, null} ; [ DW_TAG_variable ] [Array1Glob] [line 175] [def] -!41 = metadata !{i32 786454, null, metadata !"Array1Dim", metadata !3, i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ] +!41 = metadata !{i32 786454, metadata !82, null, metadata !"Array1Dim", i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ] !42 = metadata !{i32 786484, i32 0, null, metadata !"Array2Glob", metadata !"Array2Glob", metadata !"", metadata !3, i32 176, metadata !43, i32 0, i32 1, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def] -!43 = metadata !{i32 786454, null, metadata !"Array2Dim", metadata !3, i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ] -!44 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int] +!43 = metadata !{i32 786454, metadata !82, null, metadata !"Array2Dim", i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ] +!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int] !45 = metadata !{metadata !20, metadata !20} !46 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlb", metadata !"PtrGlb", metadata !"", metadata !3, i32 177, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def] -!47 = metadata !{i32 786454, null, metadata !"RecordPtr", metadata !3, i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ] -!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType] -!49 = metadata !{i32 786454, null, metadata !"RecordType", metadata !3, i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record] -!50 = metadata !{i32 786451, null, metadata !"Record", metadata !3, i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ] +!47 = metadata !{i32 786454, metadata !82, null, metadata !"RecordPtr", i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ] +!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType] +!49 = metadata !{i32 786454, metadata !82, null, metadata !"RecordType", i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record] +!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [from ] !51 = metadata !{metadata !52, metadata !54, metadata !56, metadata !57, metadata !58} -!52 = metadata !{i32 786445, metadata !50, metadata !"PtrComp", metadata !3, i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ] -!53 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record] -!54 = metadata !{i32 786445, metadata !50, metadata !"Discr", metadata !3, i32 141, i64 32, i64 32, i64 64, i32 0, metadata !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration] -!55 = metadata !{i32 786454, null, metadata !"Enumeration", metadata !3, i32 128, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ] -!56 = metadata !{i32 786445, metadata !50, metadata !"EnumComp", metadata !3, i32 142, i64 32, i64 32, i64 96, i32 0, metadata !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration] -!57 = metadata !{i32 786445, metadata !50, metadata !"IntComp", metadata !3, i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty] -!58 = metadata !{i32 786445, metadata !50, metadata !"StringComp", metadata !3, i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30] -!59 = metadata !{i32 786454, null, metadata !"String30", metadata !3, i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ] -!60 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char] +!52 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"PtrComp", i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ] +!53 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record] +!54 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"Discr", i32 141, i64 32, i64 32, i64 64, i32 0, metadata !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration] +!55 = metadata !{i32 786454, metadata !82, null, metadata !"Enumeration", i32 128, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ] +!56 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"EnumComp", i32 142, i64 32, i64 32, i64 96, i32 0, metadata !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration] +!57 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"IntComp", i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty] +!58 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"StringComp", i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30] +!59 = metadata !{i32 786454, metadata !82, null, metadata !"String30", i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ] +!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char] !61 = metadata !{metadata !62} !62 = metadata !{i32 786465, i64 0, i64 31} ; [ DW_TAG_subrange_type ] [0, 30] !63 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlbNext", metadata !"PtrGlbNext", metadata !"", metadata !3, i32 178, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def] @@ -171,3 +171,4 @@ attributes #1 = { nounwind readnone } !79 = metadata !{i32 196, i32 0, metadata !12, null} !80 = metadata !{i32 197, i32 0, metadata !12, null} !81 = metadata !{i32 198, i32 0, metadata !12, null} +!82 = metadata !{metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2"} diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll index 5f3e0d9b67..f6ca10bcc4 100644 --- a/test/DebugInfo/X86/multiple-at-const-val.ll +++ b/test/DebugInfo/X86/multiple-at-const-val.ll @@ -31,29 +31,31 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"student2.cpp", metadata !"/privite/tmp", metadata !"clang version 3.3 (trunk 174207)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, i32 4, metadata !961, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{metadata !26} !4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ] -!5 = metadata !{i32 786473, metadata !"os_base.h", metadata !"/privite/tmp", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786473, metadata !1801} ; [ DW_TAG_file_type ] !25 = metadata !{i32 786472, metadata !"_S_os_fmtflags_end", i64 65536} ; [ DW_TAG_enumerator ] -!26 = metadata !{i32 786436, metadata !4, metadata !"_Ios_Iostate", metadata !5, i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] +!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, i32 0} ; [ DW_TAG_enumeration_type ] !27 = metadata !{metadata !28, metadata !29, metadata !30, metadata !31, metadata !32} !28 = metadata !{i32 786472, metadata !"_S_goodbit", i64 0} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0] !29 = metadata !{i32 786472, metadata !"_S_badbit", i64 1} ; [ DW_TAG_enumerator ] [_S_badbit :: 1] !30 = metadata !{i32 786472, metadata !"_S_eofbit", i64 2} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2] !31 = metadata !{i32 786472, metadata !"_S_failbit", i64 4} ; [ DW_TAG_enumerator ] [_S_failbit :: 4] !32 = metadata !{i32 786472, metadata !"_S_os_ostate_end", i64 65536} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536] -!49 = metadata !{i32 786434, metadata !4, metadata !"os_base", metadata !5, i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ] +!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null} ; [ DW_TAG_class_type ] !50 = metadata !{metadata !77} !54 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !55 = metadata !{metadata !56} -!56 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!77 = metadata !{i32 786445, metadata !49, metadata !"badbit", metadata !5, i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ] -!78 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ] +!56 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!77 = metadata !{i32 786445, metadata !1801, metadata !49, metadata !"badbit", i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ] +!78 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ] !79 = metadata !{i32 786454, metadata !49, metadata !"ostate", metadata !5, i32 327, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_typedef ] !955 = metadata !{i32 0} !956 = metadata !{metadata !960} !960 = metadata !{i32 786478, i32 0, metadata !961, metadata !"main", metadata !"main", metadata !"", metadata !961, i32 73, metadata !54, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !955, i32 73} ; [ DW_TAG_subprogram ] -!961 = metadata !{i32 786473, metadata !"student2.cpp", metadata !"/privite/tmp", null} ; [ DW_TAG_file_type ] +!961 = metadata !{i32 786473, metadata !1802} ; [ DW_TAG_file_type ] !1786 = metadata !{metadata !1800} !1800 = metadata !{i32 786484, i32 0, metadata !5, metadata !"badbit", metadata !"badbit", metadata !"badbit", metadata !5, i32 331, metadata !78, i32 1, i32 1, i32 1, metadata !77} ; [ DW_TAG_variable ] +!1801 = metadata !{metadata !"os_base.h", metadata !"/privite/tmp"} +!1802 = metadata !{metadata !"student2.cpp", metadata !"/privite/tmp"} diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll index 985a7f830b..33a6f8ba9e 100644 --- a/test/DebugInfo/X86/nondefault-subrange-array.ll +++ b/test/DebugInfo/X86/nondefault-subrange-array.ll @@ -27,21 +27,22 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def] -!6 = metadata !{i32 786473, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ] +!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ] !8 = metadata !{metadata !9, metadata !14} -!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ] -!10 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int] -!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ] +!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int] +!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !12 = metadata !{metadata !13} !13 = metadata !{i32 786465, i64 -3, i64 42} ; [ DW_TAG_subrange_type ] [-3, 39] -!14 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A] +!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A] !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !16 = metadata !{null, metadata !17} !17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A] !18 = metadata !{metadata !19} !19 = metadata !{i32 786468} ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0] +!20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"} diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll index 8869dfb89f..1847d2c10f 100644 --- a/test/DebugInfo/X86/objc-fwd-decl.ll +++ b/test/DebugInfo/X86/objc-fwd-decl.ll @@ -12,14 +12,15 @@ !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10, !11, !12} -!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a, null} ; [ DW_TAG_variable ] -!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] -!8 = metadata !{i32 786451, null, metadata !"FooBarBaz", metadata !6, i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ] +!6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] +!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ] !9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} !10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} !11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} !12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} +!13 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"} diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll index 13efe21297..3bb93e7251 100644 --- a/test/DebugInfo/X86/op_deref.ll +++ b/test/DebugInfo/X86/op_deref.ll @@ -59,29 +59,30 @@ declare void @llvm.stackrestore(i8*) nounwind !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null, metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !10 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !11 = metadata !{i32 1, i32 26, metadata !5, null} !12 = metadata !{i32 3, i32 13, metadata !13, null} -!13 = metadata !{i32 786443, metadata !5, i32 2, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!13 = metadata !{i32 786443, metadata !6, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ] !14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ] -!15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ] +!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ] !16 = metadata !{metadata !17} !17 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] !18 = metadata !{i32 3, i32 7, metadata !13, null} !19 = metadata !{i32 786688, metadata !13, metadata !"i", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] !20 = metadata !{i32 4, i32 7, metadata !13, null} !21 = metadata !{i32 5, i32 8, metadata !22, null} -!22 = metadata !{i32 786443, metadata !13, i32 5, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] +!22 = metadata !{i32 786443, metadata !6, metadata !13, i32 5, i32 3, i32 1} ; [ DW_TAG_lexical_block ] !23 = metadata !{i32 6, i32 5, metadata !24, null} -!24 = metadata !{i32 786443, metadata !22, i32 5, i32 27, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] +!24 = metadata !{i32 786443, metadata !6, metadata !22, i32 5, i32 27, i32 2} ; [ DW_TAG_lexical_block ] !25 = metadata !{i32 7, i32 3, metadata !24, null} !26 = metadata !{i32 5, i32 22, metadata !22, null} !27 = metadata !{i32 8, i32 1, metadata !13, null} +!28 = metadata !{metadata !"bar.c", metadata !"/Users/echristo/tmp"} diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll index 4db84e95d2..aa560587a6 100644 --- a/test/DebugInfo/X86/pointer-type-size.ll +++ b/test/DebugInfo/X86/pointer-type-size.ll @@ -10,14 +10,15 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 147882)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass, null} ; [ DW_TAG_variable ] -!6 = metadata !{i32 720937, metadata !"foo.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786451, null, metadata !"crass", metadata !6, i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!6 = metadata !{i32 720937, metadata !13} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786445, metadata !7, metadata !"ptr", metadata !6, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] -!10 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ] -!11 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ] -!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 786445, metadata !13, metadata !7, metadata !"ptr", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] +!10 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ] +!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ] +!12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!13 = metadata !{metadata !"foo.c", metadata !"/Users/echristo/tmp"} diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll index 63cfc8179a..61df4ad0ba 100644 --- a/test/DebugInfo/X86/pr11300.ll +++ b/test/DebugInfo/X86/pr11300.ll @@ -31,17 +31,17 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !20} -!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 720937, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18, i32 4} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 720937, metadata !32} ; [ DW_TAG_file_type ] !7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null, metadata !9} -!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] -!10 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] +!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] +!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ] !11 = metadata !{metadata !12} -!12 = metadata !{i32 720942, i32 0, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ] +!12 = metadata !{i32 720942, metadata !6, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ] !13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !14 = metadata !{null, metadata !15} !15 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ] @@ -49,15 +49,16 @@ entry: !17 = metadata !{i32 720932} ; [ DW_TAG_base_type ] !18 = metadata !{metadata !19} !19 = metadata !{i32 720932} ; [ DW_TAG_base_type ] -!20 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ] +!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21, i32 2} ; [ DW_TAG_subprogram ] !21 = metadata !{metadata !22} !22 = metadata !{i32 720932} ; [ DW_TAG_base_type ] !23 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !24 = metadata !{i32 4, i32 15, metadata !5, null} !25 = metadata !{i32 4, i32 20, metadata !26, null} -!26 = metadata !{i32 786443, metadata !5, i32 4, i32 18, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!26 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 18, i32 0} ; [ DW_TAG_lexical_block ] !27 = metadata !{i32 4, i32 30, metadata !26, null} !28 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777218, metadata !15, i32 64, i32 0} ; [ DW_TAG_arg_variable ] !29 = metadata !{i32 2, i32 8, metadata !20, null} !30 = metadata !{i32 2, i32 15, metadata !31, null} -!31 = metadata !{i32 786443, metadata !20, i32 2, i32 14, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] +!31 = metadata !{i32 786443, metadata !6, metadata !20, i32 2, i32 14, i32 1} ; [ DW_TAG_lexical_block ] +!32 = metadata !{metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build"} diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll index 3970583cbe..295c018c5e 100644 --- a/test/DebugInfo/X86/pr12831.ll +++ b/test/DebugInfo/X86/pr12831.ll @@ -77,10 +77,10 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta", metadata !"clang version 3.2 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !159, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127} -!5 = metadata !{i32 786478, i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ] +!5 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, null, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ] !6 = metadata !{i32 786473, metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null, metadata !9} @@ -91,32 +91,32 @@ entry: !13 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] !14 = metadata !{i32 786434, null, metadata !"BPLModuleWriter", metadata !6, i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null} ; [ DW_TAG_class_type ] !15 = metadata !{metadata !16} -!16 = metadata !{i32 786478, i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ] +!16 = metadata !{i32 786478, metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", i32 0, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !6, i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ] !17 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !18 = metadata !{null, metadata !19, metadata !20} !19 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ] !20 = metadata !{i32 786434, null, metadata !"function<void ()>", metadata !6, i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97} ; [ DW_TAG_class_type ] !21 = metadata !{metadata !22, metadata !51, metadata !58, metadata !86, metadata !92} -!22 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ] +!22 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ] !23 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !24 = metadata !{null, metadata !25, metadata !26} !25 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ] !26 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null} ; [ DW_TAG_class_type ] !27 = metadata !{metadata !28, metadata !35, metadata !41} -!28 = metadata !{i32 786478, i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ] +!28 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"operator()", metadata !"operator()", metadata !6, i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ] !29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !30 = metadata !{null, metadata !31} !31 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ] !32 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ] !33 = metadata !{metadata !34} !34 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!35 = metadata !{i32 786478, i32 0, metadata !26, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ] +!35 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"~", metadata !"~", metadata !6, i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ] !36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !37 = metadata !{null, metadata !38} !38 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ] !39 = metadata !{metadata !40} !40 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!41 = metadata !{i32 786478, i32 0, metadata !26, metadata !"", metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ] +!41 = metadata !{i32 786478, metadata !"", i32 0, metadata !26, metadata !"", metadata !"", metadata !6, i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ] !42 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !43 = metadata !{null, metadata !38, metadata !44} !44 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_rvalue_reference_type ] @@ -126,32 +126,32 @@ entry: !48 = metadata !{i32 786479, null, metadata !"_Functor", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ] !49 = metadata !{metadata !50} !50 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!51 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !"", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ] +!51 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !6, i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ] !52 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !53 = metadata !{null, metadata !25, metadata !20} !54 = metadata !{metadata !55} !55 = metadata !{i32 786479, null, metadata !"_Functor", metadata !20, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ] !56 = metadata !{metadata !57} !57 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!58 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ] +!58 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ] !59 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !60 = metadata !{null, metadata !25, metadata !61} !61 = metadata !{i32 786434, metadata !5, metadata !"", metadata !6, i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null} ; [ DW_TAG_class_type ] !62 = metadata !{metadata !63, metadata !70, metadata !76} -!63 = metadata !{i32 786478, i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !"", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ] +!63 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"operator()", metadata !"operator()", metadata !6, i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ] !64 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !65 = metadata !{null, metadata !66} !66 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ] !67 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ] !68 = metadata !{metadata !69} !69 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!70 = metadata !{i32 786478, i32 0, metadata !61, metadata !"~", metadata !"~", metadata !"", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ] +!70 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"~", metadata !"~", metadata !6, i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ] !71 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !72 = metadata !{null, metadata !73} !73 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ] !74 = metadata !{metadata !75} !75 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!76 = metadata !{i32 786478, i32 0, metadata !61, metadata !"", metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ] +!76 = metadata !{i32 786478, metadata !"", i32 0, metadata !61, metadata !"", metadata !"", metadata !6, i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ] !77 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !78 = metadata !{null, metadata !73, metadata !79} !79 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_rvalue_reference_type ] @@ -161,13 +161,13 @@ entry: !83 = metadata !{i32 786479, null, metadata !"_Functor", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ] !84 = metadata !{metadata !85} !85 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!86 = metadata !{i32 786478, i32 0, metadata !20, metadata !"function", metadata !"function", metadata !"", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ] +!86 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"function", metadata !"function", metadata !6, i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ] !87 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !88 = metadata !{null, metadata !25, metadata !89} !89 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_rvalue_reference_type ] !90 = metadata !{metadata !91} !91 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!92 = metadata !{i32 786478, i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !"", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ] +!92 = metadata !{i32 786478, metadata !"", i32 0, metadata !20, metadata !"~function", metadata !"~function", metadata !6, i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ] !93 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !94 = metadata !{null, metadata !25} !95 = metadata !{metadata !96} @@ -178,20 +178,20 @@ entry: !100 = metadata !{null} !101 = metadata !{metadata !102} !102 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!103 = metadata !{i32 786478, i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ] +!103 = metadata !{i32 786478, metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 0, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !6, i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ] !104 = metadata !{metadata !105} !105 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!106 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ] -!107 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ] +!106 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ] +!107 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ] !108 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !109 = metadata !{null, metadata !110} !110 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_reference_type ] !111 = metadata !{metadata !112} !112 = metadata !{i32 786479, null, metadata !"_Tp", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ] -!113 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ] +!113 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !6, i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ] !114 = metadata !{i32 786434, null, metadata !"_Base_manager", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null} ; [ DW_TAG_class_type ] !115 = metadata !{metadata !116, metadata !113} -!116 = metadata !{i32 786478, i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ] +!116 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ] !117 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !118 = metadata !{null, metadata !119} !119 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_reference_type ] @@ -201,8 +201,8 @@ entry: !123 = metadata !{i32 786468} ; [ DW_TAG_base_type ] !124 = metadata !{metadata !125} !125 = metadata !{i32 786468} ; [ DW_TAG_base_type ] -!126 = metadata !{i32 786478, i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ] -!127 = metadata !{i32 786478, i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ] +!126 = metadata !{i32 786478, metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", i32 0, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ] +!127 = metadata !{i32 786478, metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 0, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !6, i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ] !128 = metadata !{metadata !130} !130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true} ; [ DW_TAG_variable ] !131 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ] @@ -233,3 +233,4 @@ entry: !156 = metadata !{i32 10, i32 13, metadata !155, null} !157 = metadata !{i32 4, i32 5, metadata !158, null} !158 = metadata !{i32 786443, metadata !127, i32 3, i32 105, metadata !6, i32 4} ; [ DW_TAG_lexical_block ] +!159 = metadata !{i32 786473, metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta", null} ; [ DW_TAG_file_type ] diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll index a72bc9b9f5..34956237ae 100644 --- a/test/DebugInfo/X86/pr13303.ll +++ b/test/DebugInfo/X86/pr13303.ll @@ -14,13 +14,13 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"PR13303.c", metadata !"/home/probinson", metadata !"clang version 3.2 (trunk 160143)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main] +!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main] !6 = metadata !{i32 786473, metadata !"PR13303.c", metadata !"/home/probinson", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9} !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !10 = metadata !{i32 1, i32 14, metadata !11, null} -!11 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c] +!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c] diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll index 0651e593b0..6e4917747c 100644 --- a/test/DebugInfo/X86/prologue-stack.ll +++ b/test/DebugInfo/X86/prologue-stack.ll @@ -20,14 +20,14 @@ declare i32 @callme(i32) !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2] +!5 = metadata !{i32 786478, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2] !6 = metadata !{i32 786473, metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9} !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !10 = metadata !{i32 5, i32 3, metadata !11, null} -!11 = metadata !{i32 786443, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c] +!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c] !12 = metadata !{i32 6, i32 3, metadata !11, null} diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll index 136db0e428..ae2e3d4578 100644 --- a/test/DebugInfo/X86/rvalue-ref.ll +++ b/test/DebugInfo/X86/rvalue-ref.ll @@ -22,11 +22,11 @@ declare i32 @printf(i8*, ...) !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null, metadata !9} !9 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_rvalue_reference_type ] @@ -34,5 +34,6 @@ declare i32 @printf(i8*, ...) !11 = metadata !{i32 786689, metadata !5, metadata !"i", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] !12 = metadata !{i32 4, i32 17, metadata !5, null} !13 = metadata !{i32 6, i32 3, metadata !14, null} -!14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!14 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ] !15 = metadata !{i32 7, i32 1, metadata !14, null} +!16 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"} diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll index 814f86c335..39a026c354 100644 --- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll +++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll @@ -42,24 +42,26 @@ entry: } !llvm.dbg.cu = !{!0, !10} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple.c", metadata !"/private/tmp", metadata !"clang version 3.3", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{metadata !2} !2 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test] -!6 = metadata !{i32 786473, metadata !"simple.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !23, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test] +!6 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9, metadata !9} !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!10 = metadata !{i32 786449, i32 0, i32 12, metadata !"simple2.c", metadata !"/private/tmp", metadata !"clang version 3.3 (trunk 172862)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1} ; [ DW_TAG_compile_unit ] +!10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !11 = metadata !{metadata !13} -!13 = metadata !{i32 786478, i32 0, metadata !14, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn] -!14 = metadata !{i32 786473, metadata !"simple2.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ] +!13 = metadata !{i32 786478, metadata !24, metadata !"fn", metadata !"fn", metadata !"", metadata !14, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn] +!14 = metadata !{i32 786473, metadata !24} ; [ DW_TAG_file_type ] !15 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777218, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 2] !16 = metadata !{i32 2, i32 0, metadata !5, null} !17 = metadata !{i32 4, i32 0, metadata !18, null} -!18 = metadata !{i32 786443, metadata !5, i32 3, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] +!18 = metadata !{i32 786443, metadata !23, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] !19 = metadata !{i32 786689, metadata !13, metadata !"a", metadata !14, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1] !20 = metadata !{i32 1, i32 0, metadata !13, null} !21 = metadata !{i32 2, i32 0, metadata !22, null} -!22 = metadata !{i32 786443, metadata !13, i32 1, i32 0, metadata !14, i32 0} ; [ DW_TAG_lexical_block ] +!22 = metadata !{i32 786443, metadata !24, metadata !13, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ] +!23 = metadata !{metadata !"simple.c", metadata !"/private/tmp"} +!24 = metadata !{metadata !"simple2.c", metadata !"/private/tmp"} diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll index 384cc2ee24..8df281d08e 100644 --- a/test/DebugInfo/X86/stringpool.ll +++ b/test/DebugInfo/X86/stringpool.ll @@ -5,12 +5,13 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"z.c", metadata !"/home/nicholas", metadata !"clang version 3.1 (trunk 143009)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy, null} ; [ DW_TAG_variable ] -!6 = metadata !{i32 720937, metadata !"z.c", metadata !"/home/nicholas", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!8 = metadata !{metadata !"z.c", metadata !"/home/nicholas"} ; Verify that we refer to 'yyyy' with a relocation. ; LINUX: .long .Linfo_string3 # DW_AT_name diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll index 5a85a37e43..bdf104f07e 100644 --- a/test/DebugInfo/X86/struct-loc.ll +++ b/test/DebugInfo/X86/struct-loc.ll @@ -13,12 +13,13 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] -!6 = metadata !{i32 786473, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ] +!6 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] -!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 786445, metadata !11, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ] +!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!11 = metadata !{metadata !"struct_bug.c", metadata !"/Users/echristo/tmp"} diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll index e9d3e77061..efc5bf0417 100644 --- a/test/DebugInfo/X86/subrange-type.ll +++ b/test/DebugInfo/X86/subrange-type.ll @@ -20,18 +20,19 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main] -!6 = metadata !{i32 786473, metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main] +!6 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !10 = metadata !{i32 786688, metadata !11, metadata !"i", metadata !6, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 4] -!11 = metadata !{i32 786443, metadata !5, i32 3, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c] -!12 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int] +!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c] +!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int] !13 = metadata !{metadata !14} !14 = metadata !{i32 786465, i64 0, i64 2} ; [ DW_TAG_subrange_type ] [0, 1] !15 = metadata !{i32 4, i32 0, metadata !11, null} !16 = metadata !{i32 6, i32 0, metadata !11, null} +!17 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"} diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll index efef1a1c35..027589b3d9 100644 --- a/test/DebugInfo/X86/subreg.ll +++ b/test/DebugInfo/X86/subreg.ll @@ -20,11 +20,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !9 = metadata !{metadata !1} !0 = metadata !{i32 786689, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ] !2 = metadata !{i32 786473, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 12, metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !9, null} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 786449, i32 12, metadata !2, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, null, null, metadata !9, null, metadata !""} ; [ DW_TAG_compile_unit ] !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !5 = metadata !{null} !6 = metadata !{i32 786468, metadata !3, metadata !"short", null, i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !7 = metadata !{i32 4, i32 22, metadata !8, null} -!8 = metadata !{i32 786443, metadata !1, i32 3, i32 19, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!8 = metadata !{i32 786443, metadata !2, metadata !1, i32 3, i32 19, i32 0} ; [ DW_TAG_lexical_block ] diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll index c5ca705ec7..570adf9e43 100644 --- a/test/DebugInfo/X86/vector.ll +++ b/test/DebugInfo/X86/vector.ll @@ -11,16 +11,17 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo", metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99] +!0 = metadata !{i32 786449, i32 12, metadata !6, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} !5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def] -!6 = metadata !{i32 786473, metadata !"foo.c", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786454, null, metadata !"v4si", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ] -!8 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int] -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786454, metadata !12, null, metadata !"v4si", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ] +!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] !10 = metadata !{metadata !11} !11 = metadata !{i32 786465, i64 0, i64 4} ; [ DW_TAG_subrange_type ] [0, 3] +!12 = metadata !{metadata !"foo.c", metadata !"/Users/echristo"} ; Check that we get an array type with a vector attribute. ; CHECK: DW_TAG_array_type diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll index cb599f0409..3077110491 100644 --- a/test/DebugInfo/array.ll +++ b/test/DebugInfo/array.ll @@ -13,16 +13,15 @@ entry: declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!2} -!13 = metadata !{metadata !0} -!0 = metadata !{i32 786478, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 786473, metadata !"array.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 786449, i32 0, i32 12, metadata !"array.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129138)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !13, null, null} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786478, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, null, null, metadata !13, null, null} ; [ DW_TAG_compile_unit ] !3 = metadata !{i32 786453, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] !6 = metadata !{i32 786688, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ] -!7 = metadata !{i32 786443, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!7 = metadata !{i32 786443, metadata !1, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ] !8 = metadata !{i32 786433, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ] !9 = metadata !{metadata !10} ;CHECK: DW_TAG_subrange_type @@ -33,3 +32,5 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !10 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] !11 = metadata !{i32 4, i32 7, metadata !7, null} !12 = metadata !{i32 5, i32 3, metadata !7, null} +!13 = metadata !{metadata !0} +!14 = metadata !{metadata !"array.c", metadata !"/private/tmp"} diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll index 4ef4197002..52b2397714 100644 --- a/test/DebugInfo/dwarf-public-names.ll +++ b/test/DebugInfo/dwarf-public-names.ll @@ -1,6 +1,6 @@ ; RUN: llc -generate-dwarf-pubnames -filetype=obj -o %t.o < %s ; RUN: llvm-dwarfdump -debug-dump=pubnames %t.o | FileCheck %s -; +; XFAIL: hexagon ; ModuleID = 'dwarf-public-names.cpp' ; ; Generated from: @@ -85,27 +85,27 @@ attributes #1 = { nounwind readnone } !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t", metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, i32 4, metadata !4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20} -!3 = metadata !{i32 786478, i32 0, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", metadata !4, i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function] -!4 = metadata !{i32 786473, metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t", null} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function] +!4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ] !5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !6 = metadata !{null, metadata !7} !7 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C] -!8 = metadata !{i32 786451, null, metadata !"C", metadata !4, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ] +!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [from ] !9 = metadata !{metadata !10, metadata !12, metadata !14} -!10 = metadata !{i32 786445, metadata !8, metadata !"static_member_variable", metadata !4, i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int] -!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!12 = metadata !{i32 786478, i32 0, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", metadata !4, i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function] +!10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int] +!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function] !13 = metadata !{i32 786468} ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0] -!14 = metadata !{i32 786478, i32 0, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", metadata !4, i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function] +!14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function] !15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !16 = metadata !{metadata !11} !17 = metadata !{i32 786468} ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0] -!18 = metadata !{i32 786478, i32 0, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", metadata !4, i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function] -!19 = metadata !{i32 786478, i32 0, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", metadata !4, i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function] -!20 = metadata !{i32 786478, i32 0, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", metadata !4, i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function] +!18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function] +!19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function] +!20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function] !21 = metadata !{i32 786489, null, metadata !"ns", metadata !4, i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] !22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !23 = metadata !{null} @@ -114,7 +114,7 @@ attributes #1 = { nounwind readnone } !26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def] !27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def] !28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9] -!29 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C] +!29 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C] !30 = metadata !{i32 9, i32 0, metadata !3, null} !31 = metadata !{i32 10, i32 0, metadata !3, null} !32 = metadata !{i32 11, i32 0, metadata !3, null} @@ -122,3 +122,4 @@ attributes #1 = { nounwind readnone } !34 = metadata !{i32 20, i32 0, metadata !19, null} !35 = metadata !{i32 25, i32 0, metadata !20, null} !36 = metadata !{i32 26, i32 0, metadata !20, null} +!37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"} diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll index b25f3faace..f302294031 100644 --- a/test/DebugInfo/inlined-vars.ll +++ b/test/DebugInfo/inlined-vars.ll @@ -17,16 +17,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", metadata !"clang version 3.2 (trunk 159419)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !10} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202", null} ; [ DW_TAG_file_type ] -!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !1, i32 10} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{metadata !9} -!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!10 = metadata !{i32 786478, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ] -!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!10 = metadata !{i32 786478, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", metadata !6, i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !12 = metadata !{metadata !9, metadata !9} !13 = metadata !{metadata !14} !14 = metadata !{metadata !15, metadata !16} @@ -51,3 +51,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !23 = metadata !{i32 4, i32 16, metadata !10, metadata !19} !24 = metadata !{i32 5, i32 3, metadata !10, metadata !19} !25 = metadata !{i32 6, i32 3, metadata !10, metadata !19} +!26 = metadata !{metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202"} diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll index 1dbadf2781..4b77189563 100644 --- a/test/DebugInfo/member-pointers.ll +++ b/test/DebugInfo/member-pointers.ll @@ -8,6 +8,7 @@ ; CHECK: DW_TAG_ptr_to_member_type ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + {{.*}} => {[[TYPE]]}) ; IR generated from clang -g with the following source: +; XFAIL: hexagon ; struct S { ; }; ; @@ -19,16 +20,17 @@ !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch", metadata !"clang version 3.3 ", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !10} !5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !7, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def] -!6 = metadata !{i32 786473, metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch", null} ; [ DW_TAG_file_type ] +!6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int] -!8 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] -!9 = metadata !{i32 786451, null, metadata !"S", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ] +!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] +!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [from ] !10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 5, metadata !11, i32 0, i32 1, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def] !11 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ] !12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !13 = metadata !{null, metadata !14, metadata !8} !14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S] +!15 = metadata !{metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch"} diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll new file mode 100644 index 0000000000..8d59b52302 --- /dev/null +++ b/test/DebugInfo/namespace.ll @@ -0,0 +1,42 @@ +; RUN: llc -O0 -filetype=obj < %s > %t +; RUN: llvm-dwarfdump %t | FileCheck %s +; CHECK: debug_info contents +; CHECK: DW_TAG_namespace +; CHECK-NEXT: DW_AT_name{{.*}} = "A" +; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1:[0-9]]]) +; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x03) +; CHECK-NOT: NULL +; CHECK: DW_TAG_namespace +; CHECK-NEXT: DW_AT_name{{.*}} = "B" +; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2:[0-9]]]) +; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x01) +; CHECK-NOT: NULL +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name{{.*}}= "i" +; CHECK: file_names[ [[F1]]]{{.*}}debug-info-namespace.cpp +; CHECK: file_names[ [[F2]]]{{.*}}foo.cpp + +; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths +; changed to protect the guilty. The C++ source code is simply: +; namespace A { +; #line 1 "foo.cpp" +; namespace B { +; int i; +; } +; } + +@_ZN1A1B1iE = global i32 0, align 4 + +!llvm.dbg.cu = !{!0} + +!0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !4, metadata !""} ; [ DW_TAG_compile_unit ] [/home/foo/debug-info-namespace.cpp] [DW_LANG_C_plus_plus] +!1 = metadata !{i32 786473, metadata !2} ; [ DW_TAG_file_type ] [/home/foo/debug-info-namespace.cpp] +!2 = metadata !{metadata !"debug-info-namespace.cpp", metadata !"/home/foo"} +!3 = metadata !{i32 0} +!4 = metadata !{metadata !5} +!5 = metadata !{i32 786484, i32 0, metadata !6, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !7, i32 2, metadata !10, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def] +!6 = metadata !{i32 786489, metadata !8, metadata !9, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1] +!7 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ] [/home/foo/foo.cpp] +!8 = metadata !{metadata !"foo.cpp", metadata !"/home/foo"} +!9 = metadata !{i32 786489, metadata !2, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3] +!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll index fe50920cf5..58671d59f7 100644 --- a/test/DebugInfo/two-cus-from-same-file.ll +++ b/test/DebugInfo/two-cus-from-same-file.ll @@ -6,6 +6,7 @@ ; RUN: llc %s -o %t -filetype=obj -O0 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s +; XFAIL: hexagon ; ModuleID = 'test.bc' @str = private unnamed_addr constant [4 x i8] c"FOO\00" @@ -32,16 +33,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !llvm.dbg.cu = !{!0, !9} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang version 3.2 (trunk 156513)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] +!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !1 = metadata !{i32 0} !3 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 786473, metadata !"foo.c", metadata !"/tmp", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] +!6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !8 = metadata !{null} -!9 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang version 3.2 (trunk 156513)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1} ; [ DW_TAG_compile_unit ] +!9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] !10 = metadata !{metadata !12} -!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ] +!12 = metadata !{i32 786478, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ] !13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !14 = metadata !{metadata !15, metadata !15, metadata !16} !15 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -61,6 +62,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone !29 = metadata !{i32 786443, metadata !12, i32 11, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] !30 = metadata !{i32 13, i32 3, metadata !29, null} !31 = metadata !{i32 14, i32 3, metadata !29, null} +!32 = metadata !{metadata !"foo.c", metadata !"/tmp"} ; This test is simple to be cross platform (many targets don't yet have ; sufficiently good DWARF emission and/or dumping) diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg index dd6a5bb240..1f8ae69b98 100644 --- a/test/ExecutionEngine/lit.local.cfg +++ b/test/ExecutionEngine/lit.local.cfg @@ -10,3 +10,5 @@ root = getRoot(config) if root.host_arch in ['PowerPC', 'AArch64']: config.unsupported = True +if 'hexagon' in root.target_triple: + config.unsupported = True diff --git a/test/ExecutionEngine/test-interp-vec-loadstore.ll b/test/ExecutionEngine/test-interp-vec-loadstore.ll new file mode 100644 index 0000000000..e9f5b445a8 --- /dev/null +++ b/test/ExecutionEngine/test-interp-vec-loadstore.ll @@ -0,0 +1,84 @@ +; RUN: %lli -force-interpreter=true %s | FileCheck %s +; CHECK: 1 +; CHECK: 2 +; CHECK: 3 +; CHECK: 4 +; CHECK: 5.{{[0]+}}e+{{[0]+}} +; CHECK: 6.{{[0]+}}e+{{[0]+}} +; CHECK: 7.{{[0]+}}e+{{[0]+}} +; CHECK: 8.{{[0]+}}e+{{[0]+}} +; CHECK: 9.{{[0]+}}e+{{[0]+}} +; CHECK: 1.{{[0]+}}e+{{[0]+}}1 +; CHECK: 1.1{{[0]+}}e+{{[0]+}}1 +; CHECK: 1.2{{[0]+}}e+{{[0]+}}1 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" + +@format_i32 = internal global [4 x i8] c"%d\0A\00" +@format_float = internal global [4 x i8] c"%e\0A\00" + +declare i32 @printf(i8*, ...) + +define i32 @main() { + %a = alloca <4 x i32>, align 16 + %b = alloca <4 x double>, align 16 + %c = alloca <4 x float>, align 16 + + store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %a, align 16 + + %val0 = load <4 x i32> *%a, align 16 + + %res_i32_0 = extractelement <4 x i32> %val0, i32 0 + %res_i32_1 = extractelement <4 x i32> %val0, i32 1 + %res_i32_2 = extractelement <4 x i32> %val0, i32 2 + %res_i32_3 = extractelement <4 x i32> %val0, i32 3 + + %ptr0 = getelementptr [4 x i8]* @format_i32, i32 0, i32 0 + call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_0) + call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_1) + call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_2) + call i32 (i8*,...)* @printf(i8* %ptr0, i32 %res_i32_3) + + store <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, <4 x double>* %b, align 16 + + %val1 = load <4 x double> *%b, align 16 + + %res_double_0 = extractelement <4 x double> %val1, i32 0 + %res_double_1 = extractelement <4 x double> %val1, i32 1 + %res_double_2 = extractelement <4 x double> %val1, i32 2 + %res_double_3 = extractelement <4 x double> %val1, i32 3 + + %ptr1 = getelementptr [4 x i8]* @format_float, i32 0, i32 0 + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_0) + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_1) + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_2) + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_double_3) + + + store <4 x float> <float 9.0, float 10.0, float 11.0, float 12.0>, <4 x float>* %c, align 16 + + %val2 = load <4 x float> *%c, align 16 + + %ptr2 = getelementptr [4 x i8]* @format_float, i32 0, i32 0 + + ; by some reason printf doesn't print float correctly, so + ; floats are casted to doubles and are printed as doubles + + %res_serv_0 = extractelement <4 x float> %val2, i32 0 + %res_float_0 = fpext float %res_serv_0 to double + %res_serv_1 = extractelement <4 x float> %val2, i32 1 + %res_float_1 = fpext float %res_serv_1 to double + %res_serv_2 = extractelement <4 x float> %val2, i32 2 + %res_float_2 = fpext float %res_serv_2 to double + %res_serv_3 = extractelement <4 x float> %val2, i32 3 + %res_float_3 = fpext float %res_serv_3 to double + + + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_0) + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_1) + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_2) + call i32 (i8*,...)* @printf(i8* %ptr1, double %res_float_3) + + + ret i32 0 +} diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll index c477b19904..fb32e704af 100644 --- a/test/Instrumentation/AddressSanitizer/basic.ll +++ b/test/Instrumentation/AddressSanitizer/basic.ll @@ -128,3 +128,15 @@ define void @i80test(i80* %a, i80* %b) nounwind uwtable sanitize_address { ; CHECK: __asan_report_store_n{{.*}}, i64 10) ; CHECK: __asan_report_store_n{{.*}}, i64 10) ; CHECK: ret void + +; asan should not instrument functions with available_externally linkage. +define available_externally i32 @f_available_externally(i32* %a) sanitize_address { +entry: + %tmp1 = load i32* %a + ret i32 %tmp1 +} +; CHECK: @f_available_externally +; CHECK-NOT: __asan_report +; CHECK: ret i32 + + diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll index 7822fd06ab..ec51caeb58 100644 --- a/test/Instrumentation/AddressSanitizer/debug_info.ll +++ b/test/Instrumentation/AddressSanitizer/debug_info.ll @@ -37,8 +37,8 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !2 = metadata !{i32 0} !3 = metadata !{metadata !4} !4 = metadata !{metadata !5} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz] -!6 = metadata !{i32 786473, metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz] +!6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] !8 = metadata !{metadata !9, metadata !9} !9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed] @@ -58,3 +58,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !13 = metadata !{i32 786443, metadata !5, i32 1, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc] !14 = metadata !{i32 2, i32 0, metadata !13, null} !15 = metadata !{i32 3, i32 0, metadata !13, null} +!16 = metadata !{metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo"} diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll index 2efd6b1e0d..0928c49415 100644 --- a/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll +++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll @@ -14,6 +14,7 @@ entry: declare void @_Z3fooPi(i32*) ; We create one global string constant for the stack frame above. +; It should have unnamed_addr and align 1. ; Make sure we don't create any other global constants. -; CHECK: = private constant -; CHECK-NOT: = private constant +; CHECK: = private unnamed_addr constant{{.*}}align 1 +; CHECK-NOT: = private unnamed_addr constant diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll index 584db3762b..1d00cfacaf 100644 --- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll +++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -asan -asan-module -asan-initialization-order -S | FileCheck %s +; RUN: opt < %s -asan -asan-module -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" @xxx = internal global i32 0, align 4 ; With dynamic initializer. diff --git a/test/Instrumentation/MemorySanitizer/unreachable.ll b/test/Instrumentation/MemorySanitizer/unreachable.ll index 66a9575d3f..c8130717c7 100644 --- a/test/Instrumentation/MemorySanitizer/unreachable.ll +++ b/test/Instrumentation/MemorySanitizer/unreachable.ll @@ -21,3 +21,19 @@ exit: ; CHECK: @Func ; CHECK: store i32 0, {{.*}} @__msan_retval_tls ; CHECK: ret i32 42 + + +define i32 @UnreachableLoop() nounwind uwtable { +entry: + ret i32 0 + +zzz: + br label %xxx + +xxx: + br label %zzz +} + +; CHECK: @UnreachableLoop +; CHECK: store i32 0, {{.*}} @__msan_retval_tls +; CHECK: ret i32 0 diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll index a08453ac4a..7b6b94edf1 100644 --- a/test/Instrumentation/ThreadSanitizer/read_from_global.ll +++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll @@ -48,7 +48,7 @@ entry: } ; CHECK: define void @call_virtual_func -; CHECK: __tsan_read +; CHECK: __tsan_vptr_read ; CHECK: = load ; CHECK-NOT: __tsan_read ; CHECK: = load diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll index 33c703b4c9..0ecff40493 100644 --- a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll +++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll @@ -20,3 +20,36 @@ entry: ; CHECK: ret i32 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + + +; Check that tsan converts mem intrinsics back to function calls. + +define void @MemCpyTest(i8* nocapture %x, i8* nocapture %y) { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @MemCpyTest +; CHECK: call i8* @memcpy +; CHECK: ret void +} + +define void @MemMoveTest(i8* nocapture %x, i8* nocapture %y) { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @MemMoveTest +; CHECK: call i8* @memmove +; CHECK: ret void +} + +define void @MemSetTest(i8* nocapture %x) { +entry: + tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false) + ret void +; CHECK define void @MemSetTest +; CHECK: call i8* @memset +; CHECK: ret void +} diff --git a/test/Instrumentation/ThreadSanitizer/vptr_read.ll b/test/Instrumentation/ThreadSanitizer/vptr_read.ll new file mode 100644 index 0000000000..404ca3ffe5 --- /dev/null +++ b/test/Instrumentation/ThreadSanitizer/vptr_read.ll @@ -0,0 +1,13 @@ +; RUN: opt < %s -tsan -S | FileCheck %s +; Check that vptr reads are treated in a special way. +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +define i8 @Foo(i8* %a) nounwind uwtable { +entry: +; CHECK: call void @__tsan_vptr_read + %0 = load i8* %a, align 8, !tbaa !0 + ret i8 %0 +} +!0 = metadata !{metadata !"vtable pointer", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} + diff --git a/test/MC/AArch64/gicv3-regs-diagnostics.s b/test/MC/AArch64/gicv3-regs-diagnostics.s new file mode 100644 index 0000000000..e891adbbb3 --- /dev/null +++ b/test/MC/AArch64/gicv3-regs-diagnostics.s @@ -0,0 +1,61 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s 2>&1 | FileCheck %s + + // Write-only + mrs x10, icc_eoir1_el1 + mrs x7, icc_eoir0_el1 + mrs x22, icc_dir_el1 + mrs x24, icc_sgi1r_el1 + mrs x8, icc_asgi1r_el1 + mrs x28, icc_sgi0r_el1 +// CHECK: error: expected readable system register +// CHECK-NEXT: mrs x10, icc_eoir1_el1 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected readable system register +// CHECK-NEXT: mrs x7, icc_eoir0_el1 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected readable system register +// CHECK-NEXT: mrs x22, icc_dir_el1 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected readable system register +// CHECK-NEXT: mrs x24, icc_sgi1r_el1 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected readable system register +// CHECK-NEXT: mrs x8, icc_asgi1r_el1 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected readable system register +// CHECK-NEXT: mrs x28, icc_sgi0r_el1 +// CHECK-NEXT: ^ + + // Read-only + msr icc_iar1_el1, x16 + msr icc_iar0_el1, x19 + msr icc_hppir1_el1, x29 + msr icc_hppir0_el1, x14 + msr icc_rpr_el1, x6 + msr ich_vtr_el2, x8 + msr ich_eisr_el2, x22 + msr ich_elsr_el2, x8 +// CHECK: error: expected writable system register or pstate +// CHECK-NEXT: msr icc_iar1_el1, x16 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected writable system register or pstate +// CHECK-NEXT: msr icc_iar0_el1, x19 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected writable system register or pstate +// CHECK-NEXT: msr icc_hppir1_el1, x29 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected writable system register or pstate +// CHECK-NEXT: msr icc_hppir0_el1, x14 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected writable system register or pstate +// CHECK-NEXT: msr icc_rpr_el1, x6 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected writable system register or pstate +// CHECK-NEXT: msr ich_vtr_el2, x8 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected writable system register or pstate +// CHECK-NEXT: msr ich_eisr_el2, x22 +// CHECK-NEXT: ^ +// CHECK-NEXT: error: expected writable system register or pstate +// CHECK-NEXT: msr ich_elsr_el2, x8 +// CHECK-NEXT: ^ diff --git a/test/MC/AArch64/gicv3-regs.s b/test/MC/AArch64/gicv3-regs.s new file mode 100644 index 0000000000..f7776514da --- /dev/null +++ b/test/MC/AArch64/gicv3-regs.s @@ -0,0 +1,223 @@ + // RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s + + mrs x8, icc_iar1_el1 + mrs x26, icc_iar0_el1 + mrs x2, icc_hppir1_el1 + mrs x17, icc_hppir0_el1 + mrs x29, icc_rpr_el1 + mrs x4, ich_vtr_el2 + mrs x24, ich_eisr_el2 + mrs x9, ich_elsr_el2 + mrs x24, icc_bpr1_el1 + mrs x14, icc_bpr0_el1 + mrs x19, icc_pmr_el1 + mrs x23, icc_ctlr_el1 + mrs x20, icc_ctlr_el3 + mrs x28, icc_sre_el1 + mrs x25, icc_sre_el2 + mrs x8, icc_sre_el3 + mrs x22, icc_igrpen0_el1 + mrs x5, icc_igrpen1_el1 + mrs x7, icc_igrpen1_el3 + mrs x22, icc_seien_el1 + mrs x4, icc_ap0r0_el1 + mrs x11, icc_ap0r1_el1 + mrs x27, icc_ap0r2_el1 + mrs x21, icc_ap0r3_el1 + mrs x2, icc_ap1r0_el1 + mrs x21, icc_ap1r1_el1 + mrs x10, icc_ap1r2_el1 + mrs x27, icc_ap1r3_el1 + mrs x20, ich_ap0r0_el2 + mrs x21, ich_ap0r1_el2 + mrs x5, ich_ap0r2_el2 + mrs x4, ich_ap0r3_el2 + mrs x15, ich_ap1r0_el2 + mrs x12, ich_ap1r1_el2 + mrs x27, ich_ap1r2_el2 + mrs x20, ich_ap1r3_el2 + mrs x10, ich_hcr_el2 + mrs x27, ich_misr_el2 + mrs x6, ich_vmcr_el2 + mrs x19, ich_vseir_el2 + mrs x3, ich_lr0_el2 + mrs x1, ich_lr1_el2 + mrs x22, ich_lr2_el2 + mrs x21, ich_lr3_el2 + mrs x6, ich_lr4_el2 + mrs x10, ich_lr5_el2 + mrs x11, ich_lr6_el2 + mrs x12, ich_lr7_el2 + mrs x0, ich_lr8_el2 + mrs x21, ich_lr9_el2 + mrs x13, ich_lr10_el2 + mrs x26, ich_lr11_el2 + mrs x1, ich_lr12_el2 + mrs x8, ich_lr13_el2 + mrs x2, ich_lr14_el2 + mrs x8, ich_lr15_el2 +// CHECK: mrs x8, icc_iar1_el1 // encoding: [0x08,0xcc,0x38,0xd5] +// CHECK: mrs x26, icc_iar0_el1 // encoding: [0x1a,0xc8,0x38,0xd5] +// CHECK: mrs x2, icc_hppir1_el1 // encoding: [0x42,0xcc,0x38,0xd5] +// CHECK: mrs x17, icc_hppir0_el1 // encoding: [0x51,0xc8,0x38,0xd5] +// CHECK: mrs x29, icc_rpr_el1 // encoding: [0x7d,0xcb,0x38,0xd5] +// CHECK: mrs x4, ich_vtr_el2 // encoding: [0x24,0xcb,0x3c,0xd5] +// CHECK: mrs x24, ich_eisr_el2 // encoding: [0x78,0xcb,0x3c,0xd5] +// CHECK: mrs x9, ich_elsr_el2 // encoding: [0xa9,0xcb,0x3c,0xd5] +// CHECK: mrs x24, icc_bpr1_el1 // encoding: [0x78,0xcc,0x38,0xd5] +// CHECK: mrs x14, icc_bpr0_el1 // encoding: [0x6e,0xc8,0x38,0xd5] +// CHECK: mrs x19, icc_pmr_el1 // encoding: [0x13,0x46,0x38,0xd5] +// CHECK: mrs x23, icc_ctlr_el1 // encoding: [0x97,0xcc,0x38,0xd5] +// CHECK: mrs x20, icc_ctlr_el3 // encoding: [0x94,0xcc,0x3e,0xd5] +// CHECK: mrs x28, icc_sre_el1 // encoding: [0xbc,0xcc,0x38,0xd5] +// CHECK: mrs x25, icc_sre_el2 // encoding: [0xb9,0xc9,0x3c,0xd5] +// CHECK: mrs x8, icc_sre_el3 // encoding: [0xa8,0xcc,0x3e,0xd5] +// CHECK: mrs x22, icc_igrpen0_el1 // encoding: [0xd6,0xcc,0x38,0xd5] +// CHECK: mrs x5, icc_igrpen1_el1 // encoding: [0xe5,0xcc,0x38,0xd5] +// CHECK: mrs x7, icc_igrpen1_el3 // encoding: [0xe7,0xcc,0x3e,0xd5] +// CHECK: mrs x22, icc_seien_el1 // encoding: [0x16,0xcd,0x38,0xd5] +// CHECK: mrs x4, icc_ap0r0_el1 // encoding: [0x84,0xc8,0x38,0xd5] +// CHECK: mrs x11, icc_ap0r1_el1 // encoding: [0xab,0xc8,0x38,0xd5] +// CHECK: mrs x27, icc_ap0r2_el1 // encoding: [0xdb,0xc8,0x38,0xd5] +// CHECK: mrs x21, icc_ap0r3_el1 // encoding: [0xf5,0xc8,0x38,0xd5] +// CHECK: mrs x2, icc_ap1r0_el1 // encoding: [0x02,0xc9,0x38,0xd5] +// CHECK: mrs x21, icc_ap1r1_el1 // encoding: [0x35,0xc9,0x38,0xd5] +// CHECK: mrs x10, icc_ap1r2_el1 // encoding: [0x4a,0xc9,0x38,0xd5] +// CHECK: mrs x27, icc_ap1r3_el1 // encoding: [0x7b,0xc9,0x38,0xd5] +// CHECK: mrs x20, ich_ap0r0_el2 // encoding: [0x14,0xc8,0x3c,0xd5] +// CHECK: mrs x21, ich_ap0r1_el2 // encoding: [0x35,0xc8,0x3c,0xd5] +// CHECK: mrs x5, ich_ap0r2_el2 // encoding: [0x45,0xc8,0x3c,0xd5] +// CHECK: mrs x4, ich_ap0r3_el2 // encoding: [0x64,0xc8,0x3c,0xd5] +// CHECK: mrs x15, ich_ap1r0_el2 // encoding: [0x0f,0xc9,0x3c,0xd5] +// CHECK: mrs x12, ich_ap1r1_el2 // encoding: [0x2c,0xc9,0x3c,0xd5] +// CHECK: mrs x27, ich_ap1r2_el2 // encoding: [0x5b,0xc9,0x3c,0xd5] +// CHECK: mrs x20, ich_ap1r3_el2 // encoding: [0x74,0xc9,0x3c,0xd5] +// CHECK: mrs x10, ich_hcr_el2 // encoding: [0x0a,0xcb,0x3c,0xd5] +// CHECK: mrs x27, ich_misr_el2 // encoding: [0x5b,0xcb,0x3c,0xd5] +// CHECK: mrs x6, ich_vmcr_el2 // encoding: [0xe6,0xcb,0x3c,0xd5] +// CHECK: mrs x19, ich_vseir_el2 // encoding: [0x93,0xc9,0x3c,0xd5] +// CHECK: mrs x3, ich_lr0_el2 // encoding: [0x03,0xcc,0x3c,0xd5] +// CHECK: mrs x1, ich_lr1_el2 // encoding: [0x21,0xcc,0x3c,0xd5] +// CHECK: mrs x22, ich_lr2_el2 // encoding: [0x56,0xcc,0x3c,0xd5] +// CHECK: mrs x21, ich_lr3_el2 // encoding: [0x75,0xcc,0x3c,0xd5] +// CHECK: mrs x6, ich_lr4_el2 // encoding: [0x86,0xcc,0x3c,0xd5] +// CHECK: mrs x10, ich_lr5_el2 // encoding: [0xaa,0xcc,0x3c,0xd5] +// CHECK: mrs x11, ich_lr6_el2 // encoding: [0xcb,0xcc,0x3c,0xd5] +// CHECK: mrs x12, ich_lr7_el2 // encoding: [0xec,0xcc,0x3c,0xd5] +// CHECK: mrs x0, ich_lr8_el2 // encoding: [0x00,0xcd,0x3c,0xd5] +// CHECK: mrs x21, ich_lr9_el2 // encoding: [0x35,0xcd,0x3c,0xd5] +// CHECK: mrs x13, ich_lr10_el2 // encoding: [0x4d,0xcd,0x3c,0xd5] +// CHECK: mrs x26, ich_lr11_el2 // encoding: [0x7a,0xcd,0x3c,0xd5] +// CHECK: mrs x1, ich_lr12_el2 // encoding: [0x81,0xcd,0x3c,0xd5] +// CHECK: mrs x8, ich_lr13_el2 // encoding: [0xa8,0xcd,0x3c,0xd5] +// CHECK: mrs x2, ich_lr14_el2 // encoding: [0xc2,0xcd,0x3c,0xd5] +// CHECK: mrs x8, ich_lr15_el2 // encoding: [0xe8,0xcd,0x3c,0xd5] + + msr icc_eoir1_el1, x27 + msr icc_eoir0_el1, x5 + msr icc_dir_el1, x13 + msr icc_sgi1r_el1, x21 + msr icc_asgi1r_el1, x25 + msr icc_sgi0r_el1, x28 + msr icc_bpr1_el1, x7 + msr icc_bpr0_el1, x9 + msr icc_pmr_el1, x29 + msr icc_ctlr_el1, x24 + msr icc_ctlr_el3, x0 + msr icc_sre_el1, x2 + msr icc_sre_el2, x5 + msr icc_sre_el3, x10 + msr icc_igrpen0_el1, x22 + msr icc_igrpen1_el1, x11 + msr icc_igrpen1_el3, x8 + msr icc_seien_el1, x4 + msr icc_ap0r0_el1, x27 + msr icc_ap0r1_el1, x5 + msr icc_ap0r2_el1, x20 + msr icc_ap0r3_el1, x0 + msr icc_ap1r0_el1, x2 + msr icc_ap1r1_el1, x29 + msr icc_ap1r2_el1, x23 + msr icc_ap1r3_el1, x11 + msr ich_ap0r0_el2, x2 + msr ich_ap0r1_el2, x27 + msr ich_ap0r2_el2, x7 + msr ich_ap0r3_el2, x1 + msr ich_ap1r0_el2, x7 + msr ich_ap1r1_el2, x12 + msr ich_ap1r2_el2, x14 + msr ich_ap1r3_el2, x13 + msr ich_hcr_el2, x1 + msr ich_misr_el2, x10 + msr ich_vmcr_el2, x24 + msr ich_vseir_el2, x29 + msr ich_lr0_el2, x26 + msr ich_lr1_el2, x9 + msr ich_lr2_el2, x18 + msr ich_lr3_el2, x26 + msr ich_lr4_el2, x22 + msr ich_lr5_el2, x26 + msr ich_lr6_el2, x27 + msr ich_lr7_el2, x8 + msr ich_lr8_el2, x17 + msr ich_lr9_el2, x19 + msr ich_lr10_el2, x17 + msr ich_lr11_el2, x5 + msr ich_lr12_el2, x29 + msr ich_lr13_el2, x2 + msr ich_lr14_el2, x13 + msr ich_lr15_el2, x27 +// CHECK: msr icc_eoir1_el1, x27 // encoding: [0x3b,0xcc,0x18,0xd5] +// CHECK: msr icc_eoir0_el1, x5 // encoding: [0x25,0xc8,0x18,0xd5] +// CHECK: msr icc_dir_el1, x13 // encoding: [0x2d,0xcb,0x18,0xd5] +// CHECK: msr icc_sgi1r_el1, x21 // encoding: [0xb5,0xcb,0x18,0xd5] +// CHECK: msr icc_asgi1r_el1, x25 // encoding: [0xd9,0xcb,0x18,0xd5] +// CHECK: msr icc_sgi0r_el1, x28 // encoding: [0xfc,0xcb,0x18,0xd5] +// CHECK: msr icc_bpr1_el1, x7 // encoding: [0x67,0xcc,0x18,0xd5] +// CHECK: msr icc_bpr0_el1, x9 // encoding: [0x69,0xc8,0x18,0xd5] +// CHECK: msr icc_pmr_el1, x29 // encoding: [0x1d,0x46,0x18,0xd5] +// CHECK: msr icc_ctlr_el1, x24 // encoding: [0x98,0xcc,0x18,0xd5] +// CHECK: msr icc_ctlr_el3, x0 // encoding: [0x80,0xcc,0x1e,0xd5] +// CHECK: msr icc_sre_el1, x2 // encoding: [0xa2,0xcc,0x18,0xd5] +// CHECK: msr icc_sre_el2, x5 // encoding: [0xa5,0xc9,0x1c,0xd5] +// CHECK: msr icc_sre_el3, x10 // encoding: [0xaa,0xcc,0x1e,0xd5] +// CHECK: msr icc_igrpen0_el1, x22 // encoding: [0xd6,0xcc,0x18,0xd5] +// CHECK: msr icc_igrpen1_el1, x11 // encoding: [0xeb,0xcc,0x18,0xd5] +// CHECK: msr icc_igrpen1_el3, x8 // encoding: [0xe8,0xcc,0x1e,0xd5] +// CHECK: msr icc_seien_el1, x4 // encoding: [0x04,0xcd,0x18,0xd5] +// CHECK: msr icc_ap0r0_el1, x27 // encoding: [0x9b,0xc8,0x18,0xd5] +// CHECK: msr icc_ap0r1_el1, x5 // encoding: [0xa5,0xc8,0x18,0xd5] +// CHECK: msr icc_ap0r2_el1, x20 // encoding: [0xd4,0xc8,0x18,0xd5] +// CHECK: msr icc_ap0r3_el1, x0 // encoding: [0xe0,0xc8,0x18,0xd5] +// CHECK: msr icc_ap1r0_el1, x2 // encoding: [0x02,0xc9,0x18,0xd5] +// CHECK: msr icc_ap1r1_el1, x29 // encoding: [0x3d,0xc9,0x18,0xd5] +// CHECK: msr icc_ap1r2_el1, x23 // encoding: [0x57,0xc9,0x18,0xd5] +// CHECK: msr icc_ap1r3_el1, x11 // encoding: [0x6b,0xc9,0x18,0xd5] +// CHECK: msr ich_ap0r0_el2, x2 // encoding: [0x02,0xc8,0x1c,0xd5] +// CHECK: msr ich_ap0r1_el2, x27 // encoding: [0x3b,0xc8,0x1c,0xd5] +// CHECK: msr ich_ap0r2_el2, x7 // encoding: [0x47,0xc8,0x1c,0xd5] +// CHECK: msr ich_ap0r3_el2, x1 // encoding: [0x61,0xc8,0x1c,0xd5] +// CHECK: msr ich_ap1r0_el2, x7 // encoding: [0x07,0xc9,0x1c,0xd5] +// CHECK: msr ich_ap1r1_el2, x12 // encoding: [0x2c,0xc9,0x1c,0xd5] +// CHECK: msr ich_ap1r2_el2, x14 // encoding: [0x4e,0xc9,0x1c,0xd5] +// CHECK: msr ich_ap1r3_el2, x13 // encoding: [0x6d,0xc9,0x1c,0xd5] +// CHECK: msr ich_hcr_el2, x1 // encoding: [0x01,0xcb,0x1c,0xd5] +// CHECK: msr ich_misr_el2, x10 // encoding: [0x4a,0xcb,0x1c,0xd5] +// CHECK: msr ich_vmcr_el2, x24 // encoding: [0xf8,0xcb,0x1c,0xd5] +// CHECK: msr ich_vseir_el2, x29 // encoding: [0x9d,0xc9,0x1c,0xd5] +// CHECK: msr ich_lr0_el2, x26 // encoding: [0x1a,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr1_el2, x9 // encoding: [0x29,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr2_el2, x18 // encoding: [0x52,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr3_el2, x26 // encoding: [0x7a,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr4_el2, x22 // encoding: [0x96,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr5_el2, x26 // encoding: [0xba,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr6_el2, x27 // encoding: [0xdb,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr7_el2, x8 // encoding: [0xe8,0xcc,0x1c,0xd5] +// CHECK: msr ich_lr8_el2, x17 // encoding: [0x11,0xcd,0x1c,0xd5] +// CHECK: msr ich_lr9_el2, x19 // encoding: [0x33,0xcd,0x1c,0xd5] +// CHECK: msr ich_lr10_el2, x17 // encoding: [0x51,0xcd,0x1c,0xd5] +// CHECK: msr ich_lr11_el2, x5 // encoding: [0x65,0xcd,0x1c,0xd5] +// CHECK: msr ich_lr12_el2, x29 // encoding: [0x9d,0xcd,0x1c,0xd5] +// CHECK: msr ich_lr13_el2, x2 // encoding: [0xa2,0xcd,0x1c,0xd5] +// CHECK: msr ich_lr14_el2, x13 // encoding: [0xcd,0xcd,0x1c,0xd5] +// CHECK: msr ich_lr15_el2, x27 // encoding: [0xfb,0xcd,0x1c,0xd5] diff --git a/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s b/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s new file mode 100644 index 0000000000..172abcf6f8 --- /dev/null +++ b/test/MC/ARM/2013-03-18-Br-to-label-named-like-reg.s @@ -0,0 +1,5 @@ +@ RUN: llvm-mc -arch arm %s +@ CHECK: test: +@ CHECK: br r1 +test: + bl r1 diff --git a/test/MC/Disassembler/AArch64/gicv3-regs.txt b/test/MC/Disassembler/AArch64/gicv3-regs.txt new file mode 100644 index 0000000000..4351f6460c --- /dev/null +++ b/test/MC/Disassembler/AArch64/gicv3-regs.txt @@ -0,0 +1,222 @@ +# RUN: llvm-mc -triple aarch64-none-linux-gnu -disassemble < %s | FileCheck %s + +0x8 0xcc 0x38 0xd5 +# CHECK: mrs x8, icc_iar1_el1 +0x1a 0xc8 0x38 0xd5 +# CHECK: mrs x26, icc_iar0_el1 +0x42 0xcc 0x38 0xd5 +# CHECK: mrs x2, icc_hppir1_el1 +0x51 0xc8 0x38 0xd5 +# CHECK: mrs x17, icc_hppir0_el1 +0x7d 0xcb 0x38 0xd5 +# CHECK: mrs x29, icc_rpr_el1 +0x24 0xcb 0x3c 0xd5 +# CHECK: mrs x4, ich_vtr_el2 +0x78 0xcb 0x3c 0xd5 +# CHECK: mrs x24, ich_eisr_el2 +0xa9 0xcb 0x3c 0xd5 +# CHECK: mrs x9, ich_elsr_el2 +0x78 0xcc 0x38 0xd5 +# CHECK: mrs x24, icc_bpr1_el1 +0x6e 0xc8 0x38 0xd5 +# CHECK: mrs x14, icc_bpr0_el1 +0x13 0x46 0x38 0xd5 +# CHECK: mrs x19, icc_pmr_el1 +0x97 0xcc 0x38 0xd5 +# CHECK: mrs x23, icc_ctlr_el1 +0x94 0xcc 0x3e 0xd5 +# CHECK: mrs x20, icc_ctlr_el3 +0xbc 0xcc 0x38 0xd5 +# CHECK: mrs x28, icc_sre_el1 +0xb9 0xc9 0x3c 0xd5 +# CHECK: mrs x25, icc_sre_el2 +0xa8 0xcc 0x3e 0xd5 +# CHECK: mrs x8, icc_sre_el3 +0xd6 0xcc 0x38 0xd5 +# CHECK: mrs x22, icc_igrpen0_el1 +0xe5 0xcc 0x38 0xd5 +# CHECK: mrs x5, icc_igrpen1_el1 +0xe7 0xcc 0x3e 0xd5 +# CHECK: mrs x7, icc_igrpen1_el3 +0x16 0xcd 0x38 0xd5 +# CHECK: mrs x22, icc_seien_el1 +0x84 0xc8 0x38 0xd5 +# CHECK: mrs x4, icc_ap0r0_el1 +0xab 0xc8 0x38 0xd5 +# CHECK: mrs x11, icc_ap0r1_el1 +0xdb 0xc8 0x38 0xd5 +# CHECK: mrs x27, icc_ap0r2_el1 +0xf5 0xc8 0x38 0xd5 +# CHECK: mrs x21, icc_ap0r3_el1 +0x2 0xc9 0x38 0xd5 +# CHECK: mrs x2, icc_ap1r0_el1 +0x35 0xc9 0x38 0xd5 +# CHECK: mrs x21, icc_ap1r1_el1 +0x4a 0xc9 0x38 0xd5 +# CHECK: mrs x10, icc_ap1r2_el1 +0x7b 0xc9 0x38 0xd5 +# CHECK: mrs x27, icc_ap1r3_el1 +0x14 0xc8 0x3c 0xd5 +# CHECK: mrs x20, ich_ap0r0_el2 +0x35 0xc8 0x3c 0xd5 +# CHECK: mrs x21, ich_ap0r1_el2 +0x45 0xc8 0x3c 0xd5 +# CHECK: mrs x5, ich_ap0r2_el2 +0x64 0xc8 0x3c 0xd5 +# CHECK: mrs x4, ich_ap0r3_el2 +0xf 0xc9 0x3c 0xd5 +# CHECK: mrs x15, ich_ap1r0_el2 +0x2c 0xc9 0x3c 0xd5 +# CHECK: mrs x12, ich_ap1r1_el2 +0x5b 0xc9 0x3c 0xd5 +# CHECK: mrs x27, ich_ap1r2_el2 +0x74 0xc9 0x3c 0xd5 +# CHECK: mrs x20, ich_ap1r3_el2 +0xa 0xcb 0x3c 0xd5 +# CHECK: mrs x10, ich_hcr_el2 +0x5b 0xcb 0x3c 0xd5 +# CHECK: mrs x27, ich_misr_el2 +0xe6 0xcb 0x3c 0xd5 +# CHECK: mrs x6, ich_vmcr_el2 +0x93 0xc9 0x3c 0xd5 +# CHECK: mrs x19, ich_vseir_el2 +0x3 0xcc 0x3c 0xd5 +# CHECK: mrs x3, ich_lr0_el2 +0x21 0xcc 0x3c 0xd5 +# CHECK: mrs x1, ich_lr1_el2 +0x56 0xcc 0x3c 0xd5 +# CHECK: mrs x22, ich_lr2_el2 +0x75 0xcc 0x3c 0xd5 +# CHECK: mrs x21, ich_lr3_el2 +0x86 0xcc 0x3c 0xd5 +# CHECK: mrs x6, ich_lr4_el2 +0xaa 0xcc 0x3c 0xd5 +# CHECK: mrs x10, ich_lr5_el2 +0xcb 0xcc 0x3c 0xd5 +# CHECK: mrs x11, ich_lr6_el2 +0xec 0xcc 0x3c 0xd5 +# CHECK: mrs x12, ich_lr7_el2 +0x0 0xcd 0x3c 0xd5 +# CHECK: mrs x0, ich_lr8_el2 +0x35 0xcd 0x3c 0xd5 +# CHECK: mrs x21, ich_lr9_el2 +0x4d 0xcd 0x3c 0xd5 +# CHECK: mrs x13, ich_lr10_el2 +0x7a 0xcd 0x3c 0xd5 +# CHECK: mrs x26, ich_lr11_el2 +0x81 0xcd 0x3c 0xd5 +# CHECK: mrs x1, ich_lr12_el2 +0xa8 0xcd 0x3c 0xd5 +# CHECK: mrs x8, ich_lr13_el2 +0xc2 0xcd 0x3c 0xd5 +# CHECK: mrs x2, ich_lr14_el2 +0xe8 0xcd 0x3c 0xd5 +# CHECK: mrs x8, ich_lr15_el2 +0x3b 0xcc 0x18 0xd5 +# CHECK: msr icc_eoir1_el1, x27 +0x25 0xc8 0x18 0xd5 +# CHECK: msr icc_eoir0_el1, x5 +0x2d 0xcb 0x18 0xd5 +# CHECK: msr icc_dir_el1, x13 +0xb5 0xcb 0x18 0xd5 +# CHECK: msr icc_sgi1r_el1, x21 +0xd9 0xcb 0x18 0xd5 +# CHECK: msr icc_asgi1r_el1, x25 +0xfc 0xcb 0x18 0xd5 +# CHECK: msr icc_sgi0r_el1, x28 +0x67 0xcc 0x18 0xd5 +# CHECK: msr icc_bpr1_el1, x7 +0x69 0xc8 0x18 0xd5 +# CHECK: msr icc_bpr0_el1, x9 +0x1d 0x46 0x18 0xd5 +# CHECK: msr icc_pmr_el1, x29 +0x98 0xcc 0x18 0xd5 +# CHECK: msr icc_ctlr_el1, x24 +0x80 0xcc 0x1e 0xd5 +# CHECK: msr icc_ctlr_el3, x0 +0xa2 0xcc 0x18 0xd5 +# CHECK: msr icc_sre_el1, x2 +0xa5 0xc9 0x1c 0xd5 +# CHECK: msr icc_sre_el2, x5 +0xaa 0xcc 0x1e 0xd5 +# CHECK: msr icc_sre_el3, x10 +0xd6 0xcc 0x18 0xd5 +# CHECK: msr icc_igrpen0_el1, x22 +0xeb 0xcc 0x18 0xd5 +# CHECK: msr icc_igrpen1_el1, x11 +0xe8 0xcc 0x1e 0xd5 +# CHECK: msr icc_igrpen1_el3, x8 +0x4 0xcd 0x18 0xd5 +# CHECK: msr icc_seien_el1, x4 +0x9b 0xc8 0x18 0xd5 +# CHECK: msr icc_ap0r0_el1, x27 +0xa5 0xc8 0x18 0xd5 +# CHECK: msr icc_ap0r1_el1, x5 +0xd4 0xc8 0x18 0xd5 +# CHECK: msr icc_ap0r2_el1, x20 +0xe0 0xc8 0x18 0xd5 +# CHECK: msr icc_ap0r3_el1, x0 +0x2 0xc9 0x18 0xd5 +# CHECK: msr icc_ap1r0_el1, x2 +0x3d 0xc9 0x18 0xd5 +# CHECK: msr icc_ap1r1_el1, x29 +0x57 0xc9 0x18 0xd5 +# CHECK: msr icc_ap1r2_el1, x23 +0x6b 0xc9 0x18 0xd5 +# CHECK: msr icc_ap1r3_el1, x11 +0x2 0xc8 0x1c 0xd5 +# CHECK: msr ich_ap0r0_el2, x2 +0x3b 0xc8 0x1c 0xd5 +# CHECK: msr ich_ap0r1_el2, x27 +0x47 0xc8 0x1c 0xd5 +# CHECK: msr ich_ap0r2_el2, x7 +0x61 0xc8 0x1c 0xd5 +# CHECK: msr ich_ap0r3_el2, x1 +0x7 0xc9 0x1c 0xd5 +# CHECK: msr ich_ap1r0_el2, x7 +0x2c 0xc9 0x1c 0xd5 +# CHECK: msr ich_ap1r1_el2, x12 +0x4e 0xc9 0x1c 0xd5 +# CHECK: msr ich_ap1r2_el2, x14 +0x6d 0xc9 0x1c 0xd5 +# CHECK: msr ich_ap1r3_el2, x13 +0x1 0xcb 0x1c 0xd5 +# CHECK: msr ich_hcr_el2, x1 +0x4a 0xcb 0x1c 0xd5 +# CHECK: msr ich_misr_el2, x10 +0xf8 0xcb 0x1c 0xd5 +# CHECK: msr ich_vmcr_el2, x24 +0x9d 0xc9 0x1c 0xd5 +# CHECK: msr ich_vseir_el2, x29 +0x1a 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr0_el2, x26 +0x29 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr1_el2, x9 +0x52 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr2_el2, x18 +0x7a 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr3_el2, x26 +0x96 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr4_el2, x22 +0xba 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr5_el2, x26 +0xdb 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr6_el2, x27 +0xe8 0xcc 0x1c 0xd5 +# CHECK: msr ich_lr7_el2, x8 +0x11 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr8_el2, x17 +0x33 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr9_el2, x19 +0x51 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr10_el2, x17 +0x65 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr11_el2, x5 +0x9d 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr12_el2, x29 +0xa2 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr13_el2, x2 +0xcd 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr14_el2, x13 +0xfb 0xcd 0x1c 0xd5 +# CHECK: msr ich_lr15_el2, x27 diff --git a/test/MC/Disassembler/ARM/thumb2.txt b/test/MC/Disassembler/ARM/thumb2.txt index 45dace3b09..31f75b39fa 100644 --- a/test/MC/Disassembler/ARM/thumb2.txt +++ b/test/MC/Disassembler/ARM/thumb2.txt @@ -254,9 +254,12 @@ #------------------------------------------------------------------------------ # CHECK: cbnz r7, #6 # CHECK: cbnz r7, #12 +# CHECK: cbz r4, #64 0x1f 0xb9 0x37 0xb9 +0x04 0xb3 + #------------------------------------------------------------------------------ # CDP/CDP2 @@ -554,6 +557,7 @@ # CHECK: ldr.w r8, [r8, r2, lsl #2] # CHECK: ldr.w r7, [sp, r2, lsl #1] # CHECK: ldr.w r7, [sp, r2] +# CHECK: ldr pc, [sp], #12 # CHECK: ldr r2, [r4, #255]! # CHECK: ldr r8, [sp, #4]! # CHECK: ldr lr, [sp, #-4]! @@ -567,6 +571,7 @@ 0x58 0xf8 0x22 0x80 0x5d 0xf8 0x12 0x70 0x5d 0xf8 0x02 0x70 +0x5d 0xf8 0x0c 0xfb 0x54 0xf8 0xff 0x2f 0x5d 0xf8 0x04 0x8f 0x5d 0xf8 0x04 0xed diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt index 5ea40eb913..9827a1809f 100644 --- a/test/MC/Disassembler/X86/simple-tests.txt +++ b/test/MC/Disassembler/X86/simple-tests.txt @@ -753,3 +753,18 @@ # CHECK: lock # CHECK-NEXT: xaddq %rcx, %rbx 0xf0 0x48 0x0f 0xc1 0xcb + +# rdar://13493622 lldb doesn't print the x86 rep/repne prefix when disassembling +# CHECK: repne +# CHECK-NEXT: movsd +0xf2 0xa5 +# CHECK: repne +# CHECK-NEXT: movsq +0xf2 0x48 0xa5 +# CHECK: repne +# CHECK-NEXT: movb $0, (%rax) +0xf2 0xc6 0x0 0x0 +# CHECK: rep +# CHECK-NEXT: lock +# CHECK-NEXT: incl (%rax) +0xf3 0xf0 0xff 0x00 diff --git a/test/MC/Mips/ef_frame.ll b/test/MC/Mips/eh-frame.ll index 91c8b43d02..91c8b43d02 100644 --- a/test/MC/Mips/ef_frame.ll +++ b/test/MC/Mips/eh-frame.ll diff --git a/test/MC/Mips/eh-frame.s b/test/MC/Mips/eh-frame.s new file mode 100644 index 0000000000..56278b8b99 --- /dev/null +++ b/test/MC/Mips/eh-frame.s @@ -0,0 +1,46 @@ +// Assembler generated object test. +// This tests .eh_frame descriptors minimally. + +// What we really need is a prettyprinter output check not unlike what +// gnu's readobj generates instead of checking the bits for .eh_frame. + +// RUN: llvm-mc -filetype=obj -mcpu=mips32r2 -triple mipsel-unknown-linux -arch=mipsel %s -o - \ +// RUN: | llvm-objdump -s - | FileCheck -check-prefix=CHECK-LEO32 %s + +// RUN: llvm-mc -filetype=obj -mcpu=mips32r2 -triple mips-unknown-linux -arch=mips %s -o - \ +// RUN: | llvm-objdump -s - | FileCheck -check-prefix=CHECK-BEO32 %s + +// RUN: llvm-mc -filetype=obj -mcpu=mips64r2 -mattr=n64 -arch=mips64el %s -o - \ +// RUN: | llvm-objdump -s - | FileCheck -check-prefix=CHECK-LE64 %s + +// RUN: llvm-mc -filetype=obj -mcpu=mips64r2 -mattr=n64 -arch=mips64 %s -o - \ +// RUN: | llvm-objdump -s - | FileCheck -check-prefix=CHECK-BE64 %s + +// O32 little endian +// CHECK-LEO32: Contents of section .eh_frame: +// CHECK-LEO32-NEXT: 0000 10000000 00000000 017a5200 017c1f01 .........zR..|.. +// CHECK-LEO32-NEXT: 0010 000c1d00 10000000 18000000 00000000 ................ +// CHECK-LEO32-NEXT: 0020 00000000 00000000 ........ + +// O32 big endian +// CHECK-BEO32: Contents of section .eh_frame: +// CHECK-BEO32-NEXT 0000 00000010 00000000 017a5200 017c1f01 .........zR..|.. +// CHECK-BEO32-NEXT 0010 000c1d00 00000010 00000018 00000000 ................ +// CHECK-BEO32-NEXT 0020 00000000 00000000 ........ + +// N64 little endian +// CHECK-LE64: Contents of section .eh_frame: +// CHECK-LE64-NEXT: 0000 10000000 00000000 017a5200 01781f01 .........zR..x.. +// CHECK-LE64-NEXT: 0010 000c1d00 18000000 18000000 00000000 ................ +// CHECK-LE64-NEXT: 0020 00000000 00000000 00000000 00000000 ................ + +// N64 big endian +// CHECK-BE64: Contents of section .eh_frame: +// CHECK-BE64-NEXT: 0000 00000010 00000000 017a5200 01781f01 .........zR..x.. +// CHECK-BE64-NEXT: 0010 000c1d00 00000018 00000018 00000000 ................ +// CHECK-BE64-NEXT: 0020 00000000 00000000 00000000 00000000 ................ + +func: + .cfi_startproc + .cfi_endproc + diff --git a/test/MC/Mips/fde-reloc.s b/test/MC/Mips/fde-reloc.s new file mode 100644 index 0000000000..2db5d0b638 --- /dev/null +++ b/test/MC/Mips/fde-reloc.s @@ -0,0 +1,21 @@ +// This just tests that a relocation of the specified type shows up as the first +// relocation in the relocation section for .eh_frame when produced by the +// assembler. + +// RUN: llvm-mc -filetype=obj %s -o - -triple mips-unknown-unknown | \ +// RUN: llvm-objdump -r - | FileCheck --check-prefix=MIPS32 %s + +// RUN: llvm-mc -filetype=obj %s -o - -triple mips64-unknown-unknown | \ +// RUN: llvm-objdump -r - | FileCheck --check-prefix=MIPS64 %s + +// PR15448 + +func: + .cfi_startproc + .cfi_endproc + +// MIPS32: RELOCATION RECORDS FOR [.eh_frame]: +// MIPS32-NEXT: R_MIPS_32 + +// MIPS64: RELOCATION RECORDS FOR [.eh_frame]: +// MIPS64-NEXT: R_MIPS_64 diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s index 816138ec65..7384d19e44 100644 --- a/test/MC/Mips/mips-alu-instructions.s +++ b/test/MC/Mips/mips-alu-instructions.s @@ -13,6 +13,7 @@ # CHECK: ins $19, $9, 6, 7 # encoding: [0x84,0x61,0x33,0x7d] # CHECK: nor $9, $6, $7 # encoding: [0x27,0x48,0xc7,0x00] # CHECK: or $3, $3, $5 # encoding: [0x25,0x18,0x65,0x00] +# CHECK: ori $4, $5, 17767 # encoding: [0x67,0x45,0xa4,0x34] # CHECK: ori $9, $6, 17767 # encoding: [0x67,0x45,0xc9,0x34] # CHECK: rotr $9, $6, 7 # encoding: [0xc2,0x49,0x26,0x00] # CHECK: rotrv $9, $6, $7 # encoding: [0x46,0x48,0xe6,0x00] @@ -40,6 +41,7 @@ ins $19, $9, 6,7 nor $9, $6, $7 or $3, $3, $5 + or $4, $5, 17767 ori $9, $6, 17767 rotr $9, $6, 7 rotrv $9, $6, $7 diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s index cfc15e883a..3385fe1930 100644 --- a/test/MC/Mips/mips-expansions.s +++ b/test/MC/Mips/mips-expansions.s @@ -16,6 +16,22 @@ # CHECK: lui $7, 1 # encoding: [0x01,0x00,0x07,0x3c] # CHECK: ori $7, $7, 2 # encoding: [0x02,0x00,0xe7,0x34] # CHECK: addu $7, $7, $8 # encoding: [0x21,0x38,0xe8,0x00] +# CHECK: lui $10, %hi(symbol) # encoding: [A,A,0x0a,0x3c] +# CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16 +# CHECK: addu $10, $10, $4 # encoding: [0x21,0x50,0x44,0x01] +# CHECK: lw $10, %lo(symbol)($10) # encoding: [A,A,0x4a,0x8d] +# CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16 +# CHECK: lui $1, %hi(symbol) # encoding: [A,A,0x01,0x3c] +# CHECK: # fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16 +# CHECK: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00] +# CHECK: sw $10, %lo(symbol)($1) # encoding: [A,A,0x2a,0xac] +# CHECK: # fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16 +# CHECK: lui $10, 10 # encoding: [0x0a,0x00,0x0a,0x3c] +# CHECK: addu $10, $10, $4 # encoding: [0x21,0x50,0x44,0x01] +# CHECK: lw $10, 123($10) # encoding: [0x7b,0x00,0x4a,0x8d] +# CHECK: lui $1, 2 # encoding: [0x02,0x00,0x01,0x3c] +# CHECK: addu $1, $1, $9 # encoding: [0x21,0x08,0x29,0x00] +# CHECK: sw $10, 57920($1) # encoding: [0x40,0xe2,0x2a,0xac] li $5,123 li $6,-2345 @@ -25,3 +41,9 @@ la $7,65538 la $a0, 20($a1) la $7,65538($8) + + lw $t2, symbol($a0) + sw $t2, symbol($t1) + + lw $t2, 655483($a0) + sw $t2, 123456($t1) diff --git a/test/MC/Mips/mips-jump-instructions.s b/test/MC/Mips/mips-jump-instructions.s index bc2d720398..1dcb287738 100644 --- a/test/MC/Mips/mips-jump-instructions.s +++ b/test/MC/Mips/mips-jump-instructions.s @@ -1,30 +1,34 @@ -# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | FileCheck %s +# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \ +# RUN: FileCheck %s # Check that the assembler can handle the documented syntax # for jumps and branches. # CHECK: .section __TEXT,__text,regular,pure_instructions #------------------------------------------------------------------------------ # Branch instructions #------------------------------------------------------------------------------ -# CHECK: b 1332 # encoding: [0x34,0x05,0x00,0x10] +# CHECK: b 1332 # encoding: [0x4d,0x01,0x00,0x10] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: bc1f 1332 # encoding: [0x34,0x05,0x00,0x45] +# CHECK: bc1f 1332 # encoding: [0x4d,0x01,0x00,0x45] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: bc1t 1332 # encoding: [0x34,0x05,0x01,0x45] +# CHECK: bc1t 1332 # encoding: [0x4d,0x01,0x01,0x45] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: beq $9, $6, 1332 # encoding: [0x34,0x05,0x26,0x11] +# CHECK: beq $9, $6, 1332 # encoding: [0x4d,0x01,0x26,0x11] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: bgez $6, 1332 # encoding: [0x34,0x05,0xc1,0x04] +# CHECK: bgez $6, 1332 # encoding: [0x4d,0x01,0xc1,0x04] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: bgezal $6, 1332 # encoding: [0x34,0x05,0xd1,0x04] +# CHECK: bgezal $6, 1332 # encoding: [0x4d,0x01,0xd1,0x04] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: bgtz $6, 1332 # encoding: [0x34,0x05,0xc0,0x1c] +# CHECK: bgtz $6, 1332 # encoding: [0x4d,0x01,0xc0,0x1c] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: blez $6, 1332 # encoding: [0x34,0x05,0xc0,0x18] +# CHECK: blez $6, 1332 # encoding: [0x4d,0x01,0xc0,0x18] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: bne $9, $6, 1332 # encoding: [0x34,0x05,0x26,0x15] +# CHECK: bne $9, $6, 1332 # encoding: [0x4d,0x01,0x26,0x15] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: bal 1332 # encoding: [0x34,0x05,0x11,0x04] +# CHECK: bal 1332 # encoding: [0x4d,0x01,0x11,0x04] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] + +.set noreorder + b 1332 nop bc1f 1332 @@ -50,9 +54,9 @@ end_of_code: #------------------------------------------------------------------------------ # Jump instructions #------------------------------------------------------------------------------ -# CHECK: j 1328 # encoding: [0x30,0x05,0x00,0x08] +# CHECK: j 1328 # encoding: [0x4c,0x01,0x00,0x08] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] -# CHECK: jal 1328 # encoding: [0x30,0x05,0x00,0x0c] +# CHECK: jal 1328 # encoding: [0x4c,0x01,0x00,0x0c] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] # CHECK: jalr $6 # encoding: [0x09,0xf8,0xc0,0x00] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] @@ -63,6 +67,11 @@ end_of_code: # CHECK: jr $7 # encoding: [0x08,0x00,0xe0,0x00] # CHECK: nop # encoding: [0x00,0x00,0x00,0x00] # CHECK: jr $7 # encoding: [0x08,0x00,0xe0,0x00] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: jalr $25 # encoding: [0x09,0xf8,0x20,0x03] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] +# CHECK: jalr $4, $25 # encoding: [0x09,0x20,0x20,0x03] +# CHECK: nop # encoding: [0x00,0x00,0x00,0x00] j 1328 @@ -78,3 +87,8 @@ end_of_code: jr $7 nop j $7 + nop + jal $25 + nop + jal $4,$25 + nop diff --git a/test/MC/Mips/mips_directives.s b/test/MC/Mips/mips_directives.s index 65d584dfa8..df7e645633 100644 --- a/test/MC/Mips/mips_directives.s +++ b/test/MC/Mips/mips_directives.s @@ -17,3 +17,9 @@ $JTI0_0: .set macro .set reorder .set at=$a0 + .set STORE_MASK,$t7 + .set FPU_MASK,$f7 +#CHECK: abs.s $f6, $f7 # encoding: [0x46,0x00,0x39,0x85] +#CHECK: and $3, $15, $15 # encoding: [0x01,0xef,0x18,0x24] + abs.s $f6,FPU_MASK + and $3,$t7,STORE_MASK diff --git a/test/MC/X86/fde-reloc.s b/test/MC/X86/fde-reloc.s new file mode 100644 index 0000000000..63ac976621 --- /dev/null +++ b/test/MC/X86/fde-reloc.s @@ -0,0 +1,11 @@ +// RUN: llvm-mc -filetype=obj %s -o - -triple x86_64-pc-linux | llvm-objdump -r - | FileCheck --check-prefix=X86-64 %s +// RUN: llvm-mc -filetype=obj %s -o - -triple i686-pc-linux | llvm-objdump -r - | FileCheck --check-prefix=I686 %s + +// PR15448 + +func: + .cfi_startproc + .cfi_endproc + +// X86-64: R_X86_64_PC32 +// I686: R_386_PC32 diff --git a/test/MC/X86/intel-syntax-encoding.s b/test/MC/X86/intel-syntax-encoding.s index 03b0551164..9806ac3802 100644 --- a/test/MC/X86/intel-syntax-encoding.s +++ b/test/MC/X86/intel-syntax-encoding.s @@ -31,6 +31,27 @@ // CHECK: encoding: [0x48,0x83,0xc0,0xf4] add rax, -12 +// CHECK: encoding: [0x66,0x83,0xd0,0xf4] + adc ax, -12 +// CHECK: encoding: [0x83,0xd0,0xf4] + adc eax, -12 +// CHECK: encoding: [0x48,0x83,0xd0,0xf4] + adc rax, -12 + +// CHECK: encoding: [0x66,0x83,0xd8,0xf4] + sbb ax, -12 +// CHECK: encoding: [0x83,0xd8,0xf4] + sbb eax, -12 +// CHECK: encoding: [0x48,0x83,0xd8,0xf4] + sbb rax, -12 + +// CHECK: encoding: [0x66,0x83,0xf8,0xf4] + cmp ax, -12 +// CHECK: encoding: [0x83,0xf8,0xf4] + cmp eax, -12 +// CHECK: encoding: [0x48,0x83,0xf8,0xf4] + cmp rax, -12 + LBB0_3: // CHECK: encoding: [0xeb,A] jmp LBB0_3 diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s index 5524c706cc..d912915c58 100644 --- a/test/MC/X86/x86-32-ms-inline-asm.s +++ b/test/MC/X86/x86-32-ms-inline-asm.s @@ -57,6 +57,26 @@ _t21: ## @t21 // CHECK: movl 4(%esi,%eax,2), %eax // CHECK: # encoding: [0x8b,0x44,0x46,0x04] + mov eax, 4[esi + 2*eax + 4] +// CHECK: movl 8(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x08] + mov eax, 4[esi][2*eax + 4] +// CHECK: movl 8(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x08] + mov eax, 4[esi + 2*eax][4] +// CHECK: movl 8(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x08] + mov eax, 4[esi][2*eax][4] +// CHECK: movl 8(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x08] + mov eax, 4[esi][2*eax][4][8] +// CHECK: movl 16(%esi,%eax,2), %eax +// CHECK: # encoding: [0x8b,0x44,0x46,0x10] + + prefetchnta 64[eax] +// CHECK: prefetchnta 64(%eax) +// CHECK: # encoding: [0x0f,0x18,0x40,0x40] + pusha // CHECK: pushal // CHECK: # encoding: [0x60] diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s index f7ee351ab5..c9bd954e90 100644 --- a/test/MC/X86/x86_64-fma4-encoding.s +++ b/test/MC/X86/x86_64-fma4-encoding.s @@ -25,6 +25,10 @@ // CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10] vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 +// CHECK: vfmaddsd %xmm10, %xmm1, %xmm0, %xmm0 +// CHECK: encoding: [0xc4,0xc3,0xf9,0x6b,0xc2,0x10] + vfmaddsd %xmm10, %xmm1, %xmm0, %xmm0 + // CHECK: vfmaddps (%rcx), %xmm1, %xmm0, %xmm0 // CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10] vfmaddps (%rcx), %xmm1, %xmm0, %xmm0 diff --git a/test/MC/X86/x86_64-rand-encoding.s b/test/MC/X86/x86_64-rand-encoding.s new file mode 100644 index 0000000000..3a8cb817bc --- /dev/null +++ b/test/MC/X86/x86_64-rand-encoding.s @@ -0,0 +1,49 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s + +// CHECK: rdrandw %ax +// CHECK: encoding: [0x66,0x0f,0xc7,0xf0] + rdrand %ax + +// CHECK: rdrandl %eax +// CHECK: encoding: [0x0f,0xc7,0xf0] + rdrand %eax + +// CHECK: rdrandq %rax +// CHECK: encoding: [0x48,0x0f,0xc7,0xf0] + rdrand %rax + +// CHECK: rdrandw %r11w +// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xf3] + rdrand %r11w + +// CHECK: rdrandl %r11d +// CHECK: encoding: [0x41,0x0f,0xc7,0xf3] + rdrand %r11d + +// CHECK: rdrandq %r11 +// CHECK: encoding: [0x49,0x0f,0xc7,0xf3] + rdrand %r11 + +// CHECK: rdseedw %ax +// CHECK: encoding: [0x66,0x0f,0xc7,0xf8] + rdseed %ax + +// CHECK: rdseedl %eax +// CHECK: encoding: [0x0f,0xc7,0xf8] + rdseed %eax + +// CHECK: rdseedq %rax +// CHECK: encoding: [0x48,0x0f,0xc7,0xf8] + rdseed %rax + +// CHECK: rdseedw %r11w +// CHECK: encoding: [0x66,0x41,0x0f,0xc7,0xfb] + rdseed %r11w + +// CHECK: rdseedl %r11d +// CHECK: encoding: [0x41,0x0f,0xc7,0xfb] + rdseed %r11d + +// CHECK: rdseedq %r11 +// CHECK: encoding: [0x49,0x0f,0xc7,0xfb] + rdseed %r11 diff --git a/test/MC/X86/x86_64-rtm-encoding.s b/test/MC/X86/x86_64-rtm-encoding.s index 44d6bacb7f..d9975d67b3 100644 --- a/test/MC/X86/x86_64-rtm-encoding.s +++ b/test/MC/X86/x86_64-rtm-encoding.s @@ -8,6 +8,10 @@ // CHECK: encoding: [0x0f,0x01,0xd5] xend +// CHECK: xtest +// CHECK: encoding: [0x0f,0x01,0xd6] + xtest + // CHECK: xabort // CHECK: encoding: [0xc6,0xf8,0x0d] xabort $13 diff --git a/test/Makefile b/test/Makefile index b7b873c813..b47695100a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -60,6 +60,11 @@ endif ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/Makefile && echo OK), OK) LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test + +# Force creation of Clang Tools' lit.site.cfg. +clang-tools-site-cfg: FORCE + $(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test lit.site.cfg +extra-site-cfgs:: clang-tools-site-cfg endif endif endif @@ -75,8 +80,9 @@ ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ; else # !AuroraUX # Newer versions of python try to allocate an insane amount of address space for # its thread-local storage, don't set a limit here. +# When -v is not used, then -s has to be used to limit the stack size. # FIXME: Those limits should be enforced by lit instead of globally. -ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; +ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -s 8192 ; endif # AuroraUX endif # SunOS diff --git a/test/Object/readobj-elf-versioning.test b/test/Object/readobj-elf-versioning.test index 0906f344e2..be1a39d9c4 100644 --- a/test/Object/readobj-elf-versioning.test +++ b/test/Object/readobj-elf-versioning.test @@ -7,9 +7,9 @@ RUN: | FileCheck %s -check-prefix ELF RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \ RUN: | FileCheck %s -check-prefix ELF64 -ELF: foo@@VER2 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global -ELF: foo@VER1 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global -ELF: unversioned_define FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: foo@@VER2 FUNC .text {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: foo@VER1 FUNC .text {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: unversioned_define FUNC .text {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global ELF32: puts@GLIBC_2.0 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global ELF64: puts@GLIBC_2.2.5 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test index 2c0b54dca4..548bd3801e 100644 --- a/test/Object/readobj-shared-object.test +++ b/test/Object/readobj-shared-object.test @@ -19,37 +19,37 @@ ELF32:Address Size: 32 bits ELF32:Load Name : libfoo.so ELF:Symbols: -ELF: Name Type Address Size FileOffset Flags -ELF: .dynsym DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .dynstr DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .text DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .eh_frame DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .tdata DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .dynamic DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .got.plt DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .data DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: .bss DBG {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific -ELF: shared.ll FILE {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} absolute,formatspecific -ELF: local_func FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} -ELF: _GLOBAL_OFFSET_TABLE_ DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} absolute -ELF: _DYNAMIC DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} absolute -ELF: common_sym DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global -ELF: tls_sym DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,threadlocal -ELF: defined_sym DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global -ELF: __bss_start ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute -ELF: _end ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute -ELF: global_func FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global -ELF: _edata ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute +ELF: Name Type Section Address Size FileOffset Flags +ELF: .dynsym DBG .dynsym {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .dynstr DBG .dynstr {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .text DBG .text {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .eh_frame DBG .eh_frame {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .tdata DBG .tdata {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .dynamic DBG .dynamic {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .got.plt DBG .got.plt {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .data DBG .data {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: .bss DBG .bss {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} formatspecific +ELF: shared.ll FILE {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} absolute,formatspecific +ELF: local_func FUNC .text {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} +ELF: _GLOBAL_OFFSET_TABLE_ DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} absolute +ELF: _DYNAMIC DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} absolute +ELF: common_sym DATA .bss {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: tls_sym DATA .tdata {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,threadlocal +ELF: defined_sym DATA .data {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: __bss_start ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute +ELF: _end ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute +ELF: global_func FUNC .text {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: _edata ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute ELF: Total: 21 ELF:Dynamic Symbols: -ELF: Name Type Address Size FileOffset Flags -ELF: common_sym DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global -ELF: tls_sym DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,threadlocal -ELF: defined_sym DATA {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: Name Type Section Address Size FileOffset Flags +ELF: common_sym DATA .bss {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: tls_sym DATA .tdata {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,threadlocal +ELF: defined_sym DATA .data {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global ELF: __bss_start ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute ELF: _end ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute -ELF: global_func FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global +ELF: global_func FUNC .text {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global ELF: _edata ? {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global,absolute ELF: Total: {{[0-9a-f]+}} diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td index 40399a48ee..14d616b521 100644 --- a/test/TableGen/Dag.td +++ b/test/TableGen/Dag.td @@ -70,3 +70,15 @@ def VAL4 : bar<foo2, somedef2>; // CHECK-NEXT: dag Dag3 = (somedef2 2); // CHECK-NEXT: NAME = ? // CHECK-NEXT: } + +def VAL5 : bar<foo2, somedef2> { + // Named operands. + let Dag1 = (somedef1 1:$name1); + + // Name, no node. + let Dag2 = (somedef2 $name2, $name3); +} + +// CHECK: def VAL5 { +// CHECK-NEXT: dag Dag1 = (somedef1 1:$name1); +// CHECK-NEXT: dag Dag2 = (somedef2 ?:$name2, ?:$name3); diff --git a/test/Transforms/ArgumentPromotion/crash.ll b/test/Transforms/ArgumentPromotion/crash.ll index f70d8de60e..5e1a0370db 100644 --- a/test/Transforms/ArgumentPromotion/crash.ll +++ b/test/Transforms/ArgumentPromotion/crash.ll @@ -1,7 +1,5 @@ -; rdar://7879828 ; RUN: opt -inline -argpromotion < %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.0.0" +; rdar://7879828 define void @foo() { invoke void @foo2() @@ -11,6 +9,8 @@ if.end432: unreachable for.end520: + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup unreachable } @@ -57,3 +57,5 @@ init: %4 = call i32 @"clay_assign(Chain, Chain)"(%0* %3, %0* %1) ret i32 0 } + +declare i32 @__gxx_personality_v0(...) diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll index 59eb4588bd..24448b7009 100644 --- a/test/Transforms/DeadArgElim/dbginfo.ll +++ b/test/Transforms/DeadArgElim/dbginfo.ll @@ -36,17 +36,17 @@ entry: !llvm.dbg.cu = !{!0} -!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", metadata !"clang version 3.2 (trunk 165305)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus] +!0 = metadata !{i32 786449, i32 4, metadata !6, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus] !1 = metadata !{i32 0} !3 = metadata !{metadata !5, metadata !8, metadata !9} -!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run] -!6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", null} ; [ DW_TAG_file_type ] +!5 = metadata !{i32 786478, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run] +!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ] !7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] -!8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg] +!8 = metadata !{i32 786478, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg] ; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz -!9 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg] +!9 = metadata !{i32 786478, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg] ; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv @@ -60,3 +60,4 @@ entry: !17 = metadata !{i32 5, i32 25, metadata !18, null} !18 = metadata !{i32 786443, metadata !8, i32 5, i32 23, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc] !19 = metadata !{i32 5, i32 30, metadata !18, null} +!20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"} diff --git a/test/Transforms/EarlyCSE/floatingpoint.ll b/test/Transforms/EarlyCSE/floatingpoint.ll new file mode 100644 index 0000000000..2abecd74b6 --- /dev/null +++ b/test/Transforms/EarlyCSE/floatingpoint.ll @@ -0,0 +1,14 @@ +; RUN: opt < %s -S -early-cse | FileCheck %s + +; Ensure we don't simplify away additions vectors of +0.0's (same as scalars). +define <4 x float> @fV( <4 x float> %a) { + ; CHECK: %b = fadd <4 x float> %a, zeroinitializer + %b = fadd <4 x float> %a, <float 0.0,float 0.0,float 0.0,float 0.0> + ret <4 x float> %b +} + +define <4 x float> @fW( <4 x float> %a) { + ; CHECK: ret <4 x float> %a + %b = fadd <4 x float> %a, <float -0.0,float -0.0,float -0.0,float -0.0> + ret <4 x float> %b +} diff --git a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll index 16791e20f4..d414b73524 100644 --- a/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll +++ b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -simplify-libcalls -S | FileCheck %s +; RUN: opt < %s -functionattrs -S | FileCheck %s ; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) #0 declare i8* @fopen(i8*, i8*) diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/FunctionAttrs/annotate-1.ll new file mode 100644 index 0000000000..ae77380acc --- /dev/null +++ b/test/Transforms/FunctionAttrs/annotate-1.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -functionattrs -S | FileCheck %s + +declare i8* @fopen(i8*, i8*) +; CHECK: declare noalias i8* @fopen(i8* nocapture, i8* nocapture) [[G0:#[0-9]]] + +declare i8 @strlen(i8*) +; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]] + +declare i32* @realloc(i32*, i32) +; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) [[G0]] + +; Test deliberately wrong declaration + +declare i32 @strcpy(...) +; CHECK: declare i32 @strcpy(...) + +; CHECK: attributes [[G0]] = { nounwind } +; CHECK: attributes [[G1]] = { nounwind readonly } diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll new file mode 100644 index 0000000000..d1bce728e0 --- /dev/null +++ b/test/Transforms/GCOVProfiling/linkagename.ll @@ -0,0 +1,27 @@ +; RUN: echo '!9 = metadata !{metadata !"%T/linkagename.ll", metadata !0}' > %t1 +; RUN: cat %s %t1 > %t2 +; RUN: opt -insert-gcov-profiling -disable-output < %t2 +; RUN: grep _Z3foov %T/linkagename.gcno +; RUN: rm %T/linkagename.gcno + +; REQUIRES: shell + +define void @_Z3foov() { +entry: + ret void, !dbg !8 +} + +!llvm.dbg.cu = !{!0} +!llvm.gcov = !{!9} + +!0 = metadata !{i32 786449, i32 4, metadata !1, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus] +!1 = metadata !{i32 786473, metadata !2} ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc] +!2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"} +!3 = metadata !{i32 0} +!4 = metadata !{metadata !5} +!5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo] +!6 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{null} +!8 = metadata !{i32 1, i32 0, metadata !5, null} + + diff --git a/test/Transforms/GCOVProfiling/lit.local.cfg b/test/Transforms/GCOVProfiling/lit.local.cfg new file mode 100644 index 0000000000..19eebc0ac7 --- /dev/null +++ b/test/Transforms/GCOVProfiling/lit.local.cfg @@ -0,0 +1 @@ +config.suffixes = ['.ll', '.c', '.cpp'] diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll new file mode 100644 index 0000000000..d6d0f3314c --- /dev/null +++ b/test/Transforms/GCOVProfiling/version.ll @@ -0,0 +1,29 @@ +; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1 +; RUN: cat %s %t1 > %t2 +; RUN: opt -insert-gcov-profiling -disable-output < %t2 +; RUN: head -c12 %T/version.gcno | grep '^oncg\*204MVLL$' +; RUN: rm %T/version.gcno +; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2 +; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2 +; RUN: head -c12 %T/version.gcno | grep '^oncg\*704MVLL$' +; RUN: rm %T/version.gcno + +define void @test() { + ret void, !dbg !8 +} + +; REQUIRES: shell + +!llvm.gcov = !{!9} +!llvm.dbg.cu = !{!0} + +!0 = metadata !{metadata !"./version", metadata !1} +!1 = metadata !{i32 786449, i32 0, i32 4, metadata !2, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus] +!2 = metadata !{i32 786473, metadata !"version", metadata !"/usr/local/google/home/nlewycky"} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 0} +!4 = metadata !{metadata !5} +!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"test", metadata !"test", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test] +!6 = metadata !{i32 786473, metadata !"<stdin>", metadata !"."} ; [ DW_TAG_file_type ] +!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!8 = metadata !{i32 1, i32 0, metadata !5, null} +;; !9 is added through the echo line at the top. diff --git a/test/Transforms/GVN/Stats/lit.local.cfg b/test/Transforms/GVN/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/GVN/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/GVN/Stats/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll index afcb7fe3bb..c2eeed56ff 100644 --- a/test/Transforms/GVN/Stats/nonescaping-malloc.ll +++ b/test/Transforms/GVN/nonescaping-malloc.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -basicaa -gvn -stats -disable-output 2>&1 | grep "Number of loads deleted" ; rdar://7363102 diff --git a/test/Transforms/GlobalOpt/Stats/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll index 0f3efa09a1..e71aed9e05 100644 --- a/test/Transforms/GlobalOpt/Stats/2009-03-05-dbg.ll +++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -globalopt -stats -disable-output 2>&1 | grep "1 globalopt - Number of global vars shrunk to booleans" @Stop = internal global i32 0 ; <i32*> [#uses=3] diff --git a/test/Transforms/GlobalOpt/Stats/lit.local.cfg b/test/Transforms/GlobalOpt/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/GlobalOpt/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/IndVarSimplify/Stats/lit.local.cfg b/test/Transforms/IndVarSimplify/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/IndVarSimplify/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/IndVarSimplify/dont-recompute.ll b/test/Transforms/IndVarSimplify/dont-recompute.ll new file mode 100644 index 0000000000..d37b0e21f8 --- /dev/null +++ b/test/Transforms/IndVarSimplify/dont-recompute.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -indvars -S | FileCheck %s + +; This tests that the IV is not recomputed outside of the loop when it is known +; to be computed by the loop and used in the loop any way. In the example below +; although a's value can be computed outside of the loop, there is no benefit +; in doing so as it has to be computed by the loop anyway. +; +; extern void func(unsigned val); +; +; void test(unsigned m) +; { +; unsigned a = 0; +; +; for (int i=0; i<186; i++) { +; a += m; +; func(a); +; } +; +; func(a); +; } + +declare void @func(i32) + +; CHECK: @test +define void @test(i32 %m) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m +; CHECK: tail call void @func(i32 %add) + tail call void @func(i32 %add) + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: tail call void @func(i32 %add.lcssa) + tail call void @func(i32 %add) + ret void +} + +; CHECK: @test2 +define i32 @test2(i32 %m) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m +; CHECK: tail call void @func(i32 %add) + tail call void @func(i32 %add) + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: ret i32 %add.lcssa + ret i32 %add +} diff --git a/test/Transforms/IndVarSimplify/Stats/phi-uses-value-multiple-times.ll b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll index 52c9e5c3ff..dc36b99482 100644 --- a/test/Transforms/IndVarSimplify/Stats/phi-uses-value-multiple-times.ll +++ b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -indvars -disable-output -stats -info-output-file - | FileCheck %s ; Check that IndVarSimplify is not creating unnecessary canonical IVs ; that will never be used. diff --git a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll index 5ced3b8e8d..b8ca56050d 100644 --- a/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll +++ b/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll @@ -3,10 +3,15 @@ define i32 @main() { entry: invoke void @__main( ) - to label %LongJmpBlkPre unwind label %LongJmpBlkPre + to label %LongJmpBlkPost unwind label %LongJmpBlkPre -LongJmpBlkPre: ; preds = %entry, %entry +LongJmpBlkPost: + ret i32 0 + +LongJmpBlkPre: %i.3 = phi i32 [ 0, %entry ], [ 0, %entry ] ; <i32> [#uses=0] + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup ret i32 0 } @@ -14,3 +19,4 @@ define void @__main() { ret void } +declare i32 @__gxx_personality_v0(...) diff --git a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll index 1bd55299a9..43bdd309c9 100644 --- a/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll +++ b/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll @@ -13,6 +13,8 @@ LJDecisionBB: ; preds = %else br label %else RethrowExcept: ; preds = %entry + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup ret i32 0 } @@ -20,4 +22,4 @@ define void @__main() { ret void } - +declare i32 @__gxx_personality_v0(...) diff --git a/test/Transforms/Inline/Stats/lit.local.cfg b/test/Transforms/Inline/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/Inline/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/Inline/Stats/delete-call.ll b/test/Transforms/Inline/delete-call.ll index 0afd2ee4c2..97c52af9e0 100644 --- a/test/Transforms/Inline/Stats/delete-call.ll +++ b/test/Transforms/Inline/delete-call.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s ; CHECK: Number of functions inlined diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll index c53bb5aa17..c3941388f9 100644 --- a/test/Transforms/Inline/inline_invoke.ll +++ b/test/Transforms/Inline/inline_invoke.ll @@ -96,6 +96,7 @@ eh.resume: ; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]]) ; CHECK-NEXT: to label %[[LBL:[^\s]+]] unwind ; CHECK: [[LBL]]: @@ -166,6 +167,7 @@ eh.resume: ; CHECK-NEXT: [[LPADVAL1:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]]) ; CHECK-NEXT: to label %[[RESUME1:[^\s]+]] unwind ; CHECK: [[RESUME1]]: @@ -185,6 +187,7 @@ eh.resume: ; CHECK-NEXT: [[LPADVAL2:%.*]] = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]]) ; CHECK-NEXT: to label %[[RESUME2:[^\s]+]] unwind ; CHECK: [[RESUME2]]: @@ -272,6 +275,7 @@ lpad.cont: ; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev( ; CHECK-NEXT: to label %[[L:[^\s]+]] unwind ; CHECK: [[L]]: @@ -318,6 +322,7 @@ terminate: ; CHECK: landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 ; CHECK-NEXT: cleanup ; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) ; CHECK-NEXT: invoke void @_ZN1AD1Ev( ; CHECK-NEXT: to label %[[L:[^\s]+]] unwind ; CHECK: [[L]]: diff --git a/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll new file mode 100644 index 0000000000..b66495d9cb --- /dev/null +++ b/test/Transforms/InstCombine/2009-02-11-NotInitialized.ll @@ -0,0 +1,14 @@ +; RUN: opt < %s -inline -instcombine -functionattrs | llvm-dis +; +; Check that nocapture attributes are added when run after an SCC pass. +; PR3520 + +define i32 @use(i8* %x) nounwind readonly { +; CHECK: @use(i8* nocapture %x) + %1 = tail call i64 @strlen(i8* %x) nounwind readonly + %2 = trunc i64 %1 to i32 + ret i32 %2 +} + +declare i64 @strlen(i8*) nounwind readonly +; CHECK: declare i64 @strlen(i8* nocapture) nounwind readonly diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll new file mode 100644 index 0000000000..4ded581a14 --- /dev/null +++ b/test/Transforms/InstCombine/bitcast-bigendian.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +; These tests are extracted from bitcast.ll. +; Verify that they also work correctly on big-endian targets. + +define float @test2(<2 x float> %A, <2 x i32> %B) { + %tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2] + %tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1] + %tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1] + + %tmp = bitcast <2 x i32> %B to i64 + %tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1] + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test2 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1 +; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float> +; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 1 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + +define float @test3(<2 x float> %A, <2 x i64> %B) { + %tmp28 = bitcast <2 x float> %A to i64 + %tmp29 = lshr i64 %tmp28, 32 + %tmp23 = trunc i64 %tmp29 to i32 + %tmp24 = bitcast i32 %tmp23 to float + + %tmp = bitcast <2 x i64> %B to i128 + %tmp1 = lshr i128 %tmp, 64 + %tmp2 = trunc i128 %tmp1 to i32 + %tmp4 = bitcast i32 %tmp2 to float + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test3 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0 +; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float> +; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 1 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} + diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll index f27ab278da..cdbcd86511 100644 --- a/test/Transforms/InstCombine/debuginfo.ll +++ b/test/Transforms/InstCombine/debuginfo.ll @@ -28,14 +28,12 @@ entry: ret i8* %call, !dbg !21 } -!25 = metadata !{metadata !0, metadata !7, metadata !9} !llvm.dbg.cu = !{!3} -!24 = metadata !{metadata !1} !0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"string.h", metadata !"Game", metadata !3} ; [ DW_TAG_file_type ] -!3 = metadata !{i32 786449, i32 0, i32 12, metadata !"bits.c", metadata !"Game", metadata !"clang version 3.0 (trunk 127710)", i1 true, i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", metadata !2, i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 786449, i32 0, i32 12, metadata !26, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, null, null, metadata !24, null, null} ; [ DW_TAG_compile_unit ] !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6} !6 = metadata !{i32 786447, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] @@ -56,3 +54,8 @@ entry: !21 = metadata !{i32 80, i32 3, metadata !22, null} !22 = metadata !{i32 786443, metadata !23, i32 80, i32 3, metadata !2, i32 7} ; [ DW_TAG_lexical_block ] !23 = metadata !{i32 786443, metadata !1, i32 79, i32 1, metadata !2, i32 6} ; [ DW_TAG_lexical_block ] +!24 = metadata !{metadata !1} +!25 = metadata !{metadata !0, metadata !7, metadata !9} +!26 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ] +!27 = metadata !{metadata !"string.h", metadata !"Game"} +!28 = metadata !{metadata !"bits.c", metadata !"Game"} diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll index 3e32a2e4dd..edcbcc71df 100644 --- a/test/Transforms/InstCombine/fast-math.ll +++ b/test/Transforms/InstCombine/fast-math.ll @@ -130,6 +130,16 @@ define double @fail2(double %f1, double %f2) { ; CHECK: ret } +; c1 * x - x => (c1 - 1.0) * x +define float @fold13(float %x) { + %mul = fmul fast float %x, 7.000000e+00 + %sub = fsub fast float %mul, %x + ret float %sub +; CHECK: fold13 +; CHECK: fmul fast float %x, 6.000000e+00 +; CHECK: ret +} + ; ========================================================================= ; ; Testing-cases about fmul begin @@ -350,3 +360,108 @@ define float @fdiv9(float %x) { ; CHECK: @fdiv9 ; CHECK: fmul fast float %x, 5.000000e+00 } + +; ========================================================================= +; +; Testing-cases about factorization +; +; ========================================================================= +; x*z + y*z => (x+y) * z +define float @fact_mul1(float %x, float %y, float %z) { + %t1 = fmul fast float %x, %z + %t2 = fmul fast float %y, %z + %t3 = fadd fast float %t1, %t2 + ret float %t3 +; CHECK: @fact_mul1 +; CHECK: fmul fast float %1, %z +} + +; z*x + y*z => (x+y) * z +define float @fact_mul2(float %x, float %y, float %z) { + %t1 = fmul fast float %z, %x + %t2 = fmul fast float %y, %z + %t3 = fsub fast float %t1, %t2 + ret float %t3 +; CHECK: @fact_mul2 +; CHECK: fmul fast float %1, %z +} + +; z*x - z*y => (x-y) * z +define float @fact_mul3(float %x, float %y, float %z) { + %t2 = fmul fast float %z, %y + %t1 = fmul fast float %z, %x + %t3 = fsub fast float %t1, %t2 + ret float %t3 +; CHECK: @fact_mul3 +; CHECK: fmul fast float %1, %z +} + +; x*z - z*y => (x-y) * z +define float @fact_mul4(float %x, float %y, float %z) { + %t1 = fmul fast float %x, %z + %t2 = fmul fast float %z, %y + %t3 = fsub fast float %t1, %t2 + ret float %t3 +; CHECK: @fact_mul4 +; CHECK: fmul fast float %1, %z +} + +; x/y + x/z, no xform +define float @fact_div1(float %x, float %y, float %z) { + %t1 = fdiv fast float %x, %y + %t2 = fdiv fast float %x, %z + %t3 = fadd fast float %t1, %t2 + ret float %t3 +; CHECK: fact_div1 +; CHECK: fadd fast float %t1, %t2 +} + +; x/y + z/x; no xform +define float @fact_div2(float %x, float %y, float %z) { + %t1 = fdiv fast float %x, %y + %t2 = fdiv fast float %z, %x + %t3 = fadd fast float %t1, %t2 + ret float %t3 +; CHECK: fact_div2 +; CHECK: fadd fast float %t1, %t2 +} + +; y/x + z/x => (y+z)/x +define float @fact_div3(float %x, float %y, float %z) { + %t1 = fdiv fast float %y, %x + %t2 = fdiv fast float %z, %x + %t3 = fadd fast float %t1, %t2 + ret float %t3 +; CHECK: fact_div3 +; CHECK: fdiv fast float %1, %x +} + +; y/x - z/x => (y-z)/x +define float @fact_div4(float %x, float %y, float %z) { + %t1 = fdiv fast float %y, %x + %t2 = fdiv fast float %z, %x + %t3 = fsub fast float %t1, %t2 + ret float %t3 +; CHECK: fact_div4 +; CHECK: fdiv fast float %1, %x +} + +; y/x - z/x => (y-z)/x is disabled if y-z is denormal. +define float @fact_div5(float %x) { + %t1 = fdiv fast float 0x3810000000000000, %x + %t2 = fdiv fast float 0x3800000000000000, %x + %t3 = fadd fast float %t1, %t2 + ret float %t3 +; CHECK: fact_div5 +; CHECK: fdiv fast float 0x3818000000000000, %x +} + +; y/x - z/x => (y-z)/x is disabled if y-z is denormal. +define float @fact_div6(float %x) { + %t1 = fdiv fast float 0x3810000000000000, %x + %t2 = fdiv fast float 0x3800000000000000, %x + %t3 = fsub fast float %t1, %t2 + ret float %t3 +; CHECK: fact_div6 +; CHECK: %t3 = fsub fast float %t1, %t2 +} diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll index 331eb3f21a..446c0e01dc 100644 --- a/test/Transforms/InstCombine/icmp.ll +++ b/test/Transforms/InstCombine/icmp.ll @@ -744,3 +744,145 @@ define i1 @icmp_shl24(i32 %x) { %cmp = icmp slt i32 %shl, 603979776 ret i1 %cmp } + +; If the (shl x, C) preserved the sign and this is a sign test, +; compare the LHS operand instead +; CHECK: @icmp_shl_nsw_sgt +; CHECK-NEXT: icmp sgt i32 %x, 0 +define i1 @icmp_shl_nsw_sgt(i32 %x) { + %shl = shl nsw i32 %x, 21 + %cmp = icmp sgt i32 %shl, 0 + ret i1 %cmp +} + +; CHECK: @icmp_shl_nsw_sge0 +; CHECK-NEXT: icmp sgt i32 %x, -1 +define i1 @icmp_shl_nsw_sge0(i32 %x) { + %shl = shl nsw i32 %x, 21 + %cmp = icmp sge i32 %shl, 0 + ret i1 %cmp +} + +; CHECK: @icmp_shl_nsw_sge1 +; CHECK-NEXT: icmp sgt i32 %x, 0 +define i1 @icmp_shl_nsw_sge1(i32 %x) { + %shl = shl nsw i32 %x, 21 + %cmp = icmp sge i32 %shl, 1 + ret i1 %cmp +} + +; Checks for icmp (eq|ne) (shl x, C), 0 +; CHECK: @icmp_shl_nsw_eq +; CHECK-NEXT: icmp eq i32 %x, 0 +define i1 @icmp_shl_nsw_eq(i32 %x) { + %mul = shl nsw i32 %x, 5 + %cmp = icmp eq i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_shl_eq +; CHECK-NOT: icmp eq i32 %mul, 0 +define i1 @icmp_shl_eq(i32 %x) { + %mul = shl i32 %x, 5 + %cmp = icmp eq i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_shl_nsw_ne +; CHECK-NEXT: icmp ne i32 %x, 0 +define i1 @icmp_shl_nsw_ne(i32 %x) { + %mul = shl nsw i32 %x, 7 + %cmp = icmp ne i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_shl_ne +; CHECK-NOT: icmp ne i32 %x, 0 +define i1 @icmp_shl_ne(i32 %x) { + %mul = shl i32 %x, 7 + %cmp = icmp ne i32 %mul, 0 + ret i1 %cmp +} + +; If the (mul x, C) preserved the sign and this is sign test, +; compare the LHS operand instead +; CHECK: @icmp_mul_nsw +; CHECK-NEXT: icmp sgt i32 %x, 0 +define i1 @icmp_mul_nsw(i32 %x) { + %mul = mul nsw i32 %x, 12 + %cmp = icmp sgt i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_mul_nsw1 +; CHECK-NEXT: icmp slt i32 %x, 0 +define i1 @icmp_mul_nsw1(i32 %x) { + %mul = mul nsw i32 %x, 12 + %cmp = icmp sle i32 %mul, -1 + ret i1 %cmp +} + +; CHECK: @icmp_mul_nsw_neg +; CHECK-NEXT: icmp slt i32 %x, 1 +define i1 @icmp_mul_nsw_neg(i32 %x) { + %mul = mul nsw i32 %x, -12 + %cmp = icmp sge i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_mul_nsw_neg1 +; CHECK-NEXT: icmp slt i32 %x, 0 +define i1 @icmp_mul_nsw_neg1(i32 %x) { + %mul = mul nsw i32 %x, -12 + %cmp = icmp sge i32 %mul, 1 + ret i1 %cmp +} + +; CHECK: @icmp_mul_nsw_0 +; CHECK-NOT: icmp sgt i32 %x, 0 +define i1 @icmp_mul_nsw_0(i32 %x) { + %mul = mul nsw i32 %x, 0 + %cmp = icmp sgt i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_mul +; CHECK-NEXT: %mul = mul i32 %x, -12 +define i1 @icmp_mul(i32 %x) { + %mul = mul i32 %x, -12 + %cmp = icmp sge i32 %mul, 0 + ret i1 %cmp +} + +; Checks for icmp (eq|ne) (mul x, C), 0 +; CHECK: @icmp_mul_neq0 +; CHECK-NEXT: icmp ne i32 %x, 0 +define i1 @icmp_mul_neq0(i32 %x) { + %mul = mul nsw i32 %x, -12 + %cmp = icmp ne i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_mul_eq0 +; CHECK-NEXT: icmp eq i32 %x, 0 +define i1 @icmp_mul_eq0(i32 %x) { + %mul = mul nsw i32 %x, 12 + %cmp = icmp eq i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_mul0_eq0 +; CHECK-NEXT: ret i1 true +define i1 @icmp_mul0_eq0(i32 %x) { + %mul = mul i32 %x, 0 + %cmp = icmp eq i32 %mul, 0 + ret i1 %cmp +} + +; CHECK: @icmp_mul0_ne0 +; CHECK-NEXT: ret i1 false +define i1 @icmp_mul0_ne0(i32 %x) { + %mul = mul i32 %x, 0 + %cmp = icmp ne i32 %mul, 0 + ret i1 %cmp +} diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll index 5cafb7787e..d88188e410 100644 --- a/test/Transforms/InstCombine/load-cmp.ll +++ b/test/Transforms/InstCombine/load-cmp.ll @@ -47,6 +47,18 @@ define i1 @test4(i32 %X) { ; CHECK-NEXT: ret i1 %R } +define i1 @test4_i16(i16 %X) { + %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i16 %X + %Q = load i16* %P + %R = icmp sle i16 %Q, 73 + ret i1 %R +; CHECK: @test4_i16 +; CHECK-NEXT: lshr i16 933, %X +; CHECK-NEXT: and i16 {{.*}}, 1 +; CHECK-NEXT: %R = icmp ne i16 {{.*}}, 0 +; CHECK-NEXT: ret i1 %R +} + define i1 @test5(i32 %X) { %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X %Q = load i16* %P diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll index 16c0c67970..7139972fe0 100644 --- a/test/Transforms/InstCombine/strto-1.ll +++ b/test/Transforms/InstCombine/strto-1.ll @@ -1,29 +1,29 @@ ; Test that the strto* library call simplifiers works correctly. ; -; RUN: opt < %s -instcombine -S | FileCheck %s +; RUN: opt < %s -instcombine -functionattrs -S | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" declare i64 @strtol(i8* %s, i8** %endptr, i32 %base) -; CHECK: declare i64 @strtol(i8*, i8**, i32) +; CHECK: declare i64 @strtol(i8*, i8** nocapture, i32) declare double @strtod(i8* %s, i8** %endptr, i32 %base) -; CHECK: declare double @strtod(i8*, i8**, i32) +; CHECK: declare double @strtod(i8*, i8** nocapture, i32) declare float @strtof(i8* %s, i8** %endptr, i32 %base) -; CHECK: declare float @strtof(i8*, i8**, i32) +; CHECK: declare float @strtof(i8*, i8** nocapture, i32) declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base) -; CHECK: declare i64 @strtoul(i8*, i8**, i32) +; CHECK: declare i64 @strtoul(i8*, i8** nocapture, i32) declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base) -; CHECK: declare i64 @strtoll(i8*, i8**, i32) +; CHECK: declare i64 @strtoll(i8*, i8** nocapture, i32) declare double @strtold(i8* %s, i8** %endptr) -; CHECK: declare double @strtold(i8*, i8**) +; CHECK: declare double @strtold(i8*, i8** nocapture) declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base) -; CHECK: declare i64 @strtoull(i8*, i8**, i32) +; CHECK: declare i64 @strtoull(i8*, i8** nocapture, i32) define void @test_simplify1(i8* %x, i8** %endptr) { ; CHECK: @test_simplify1 diff --git a/test/Transforms/InstCombine/vector-type.ll b/test/Transforms/InstCombine/vector-type.ll new file mode 100644 index 0000000000..59a4bdd19e --- /dev/null +++ b/test/Transforms/InstCombine/vector-type.ll @@ -0,0 +1,15 @@ +; The code in InstCombiner::FoldSelectOpOp was calling +; Type::getVectorNumElements without checking first if the type was a vector. + +; RUN: opt < %s -instcombine -S + +define i32 @vselect1(i32 %a.coerce, i32 %b.coerce, i32 %c.coerce) { +entry: + %0 = bitcast i32 %a.coerce to <2 x i16> + %1 = bitcast i32 %b.coerce to <2 x i16> + %2 = bitcast i32 %c.coerce to <2 x i16> + %cmp = icmp sge <2 x i16> %2, zeroinitializer + %or = select <2 x i1> %cmp, <2 x i16> %0, <2 x i16> %1 + %3 = bitcast <2 x i16> %or to i32 + ret i32 %3 +} diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll index 0ecfb1f8d2..b764c761cf 100644 --- a/test/Transforms/InstSimplify/compare.ll +++ b/test/Transforms/InstSimplify/compare.ll @@ -682,3 +682,14 @@ define zeroext i1 @external_compare(i32* noalias %x) { ; CHECK: external_compare ; CHECK: ret i1 %cmp } + +define i1 @alloca_gep(i64 %a, i64 %b) { +; CHECK: @alloca_gep +; We can prove this GEP is non-null because it is inbounds and the pointer +; is non-null. + %strs = alloca [1000 x [1001 x i8]], align 16 + %x = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %a, i64 %b + %cmp = icmp eq i8* %x, null + ret i1 %cmp +; CHECK-NEXT: ret i1 false +} diff --git a/test/Transforms/LICM/Stats/lit.local.cfg b/test/Transforms/LICM/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/LICM/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/LICM/Stats/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll index f9fc551df3..1ba94d6b48 100644 --- a/test/Transforms/LICM/Stats/hoist-invariant-load.ll +++ b/test/Transforms/LICM/hoist-invariant-load.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm" @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1 diff --git a/test/Transforms/LoopUnroll/Stats/lit.local.cfg b/test/Transforms/LoopUnroll/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/LoopUnroll/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/LoopUnroll/Stats/runtime-loop3.ll b/test/Transforms/LoopUnroll/runtime-loop3.ll index 55cf22373e..aa928ccc60 100644 --- a/test/Transforms/LoopUnroll/Stats/runtime-loop3.ll +++ b/test/Transforms/LoopUnroll/runtime-loop3.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS ; Test that nested loops can be unrolled. We need to increase threshold to do it diff --git a/test/Transforms/LoopUnswitch/Stats/2008-11-03-Invariant.ll b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll index 9d73d31d50..31dba79be1 100644 --- a/test/Transforms/LoopUnswitch/Stats/2008-11-03-Invariant.ll +++ b/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -loop-unswitch -stats -disable-output 2>&1 | grep "1 loop-unswitch - Number of branches unswitched" | count 1 ; PR 3170 define i32 @a(i32 %x, i32 %y) nounwind { diff --git a/test/Transforms/LoopUnswitch/Stats/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll index bde52da87a..a8608b8772 100644 --- a/test/Transforms/LoopUnswitch/Stats/2011-11-18-SimpleSwitch.ll +++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s ; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info < %s | FileCheck %s diff --git a/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll index c3bf5966ad..686cedbbc5 100644 --- a/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches-Threshold.ll +++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s ; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info < %s | FileCheck %s diff --git a/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll index 95303337da..3ba9fc2f5c 100644 --- a/test/Transforms/LoopUnswitch/Stats/2011-11-18-TwoSwitches.ll +++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s ; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info < %s | FileCheck %s diff --git a/test/Transforms/LoopUnswitch/Stats/lit.local.cfg b/test/Transforms/LoopUnswitch/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/LoopUnswitch/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/LoopUnswitch/Stats/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll index f3fba642f8..8261e38937 100644 --- a/test/Transforms/LoopUnswitch/Stats/infinite-loop.ll +++ b/test/Transforms/LoopUnswitch/infinite-loop.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s ; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s ; PR5373 diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 23d9233544..760d28deaf 100644 --- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -33,11 +33,10 @@ define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) noun .lr.ph: ; preds = %0, %.lr.ph %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] - %2 = add nsw i64 %indvars.iv, 3 - %3 = trunc i64 %2 to i32 - %4 = sitofp i32 %3 to float - %5 = getelementptr inbounds float* %B, i64 %indvars.iv - store float %4, float* %5, align 4 + %add = add nsw i64 %indvars.iv, 3 + %tofp = sitofp i64 %add to float + %gep = getelementptr inbounds float* %B, i64 %indvars.iv + store float %tofp, float* %gep, align 4 %indvars.iv.next = add i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %n diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll new file mode 100644 index 0000000000..3b3a7875ab --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -loop-vectorize -mcpu=core2 -debug-only=loop-vectorize 2>&1 -S | FileCheck %s +; REQUIRES: asserts +; Make sure we use the right select kind when querying select costs. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 + +; CHECK: Checking a loop in "scalarselect" +define void @scalarselect(i1 %cond) { + br label %1 + +; <label>:1 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + +; A scalar select has a cost of 1 on core2 +; CHECK: cost of 1 for VF 2 {{.*}} select i1 %cond, i32 %6, i32 0 + + %sel = select i1 %cond, i32 %6, i32 zeroinitializer + store i32 %sel, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 + ret void +} + +; CHECK: Checking a loop in "vectorselect" +define void @vectorselect(i1 %cond) { + br label %1 + +; <label>:1 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %8 = icmp ult i64 %indvars.iv, 8 + +; A vector select has a cost of 4 on core2 +; CHECK: cost of 4 for VF 2 {{.*}} select i1 %8, i32 %6, i32 0 + + %sel = select i1 %8, i32 %6, i32 zeroinitializer + store i32 %sel, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %9, label %1 + +; <label>:9 + ret void +} + diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll index d70814fcb6..c0eaaa4015 100644 --- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll +++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll @@ -8,8 +8,10 @@ entry: %0 = alloca double ; <double*> [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0), !dbg !8 -; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !0) -; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata !9) +; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata ![[IVAR:[0-9]*]]) +; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata ![[JVAR:[0-9]*]]) +; CHECK: ![[IVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [i] +; CHECK: ![[JVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [j] store i32 %i, i32* %i_addr call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9), !dbg !8 store double %j, double* %j_addr @@ -30,9 +32,11 @@ return: ; preds = %entry declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone +!llvm.dbg.cu = !{!3} + !0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"testfunc.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ] +!1 = metadata !{i32 786478, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786449, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{metadata !6, metadata !7, metadata !6} @@ -42,4 +46,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !9 = metadata !{i32 786689, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] !10 = metadata !{i32 3, i32 0, metadata !11, null} !11 = metadata !{i32 786443, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ] - +!12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"} diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll index d1a4d8414a..f6119f8bbd 100644 --- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll +++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll @@ -31,8 +31,8 @@ return: ; preds = %entry } !0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"bar.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ] +!1 = metadata !{i32 786478, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786449, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] !4 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] !5 = metadata !{null, metadata !6} @@ -40,7 +40,7 @@ return: ; preds = %entry !7 = metadata !{i32 8, i32 0, metadata !1, null} !8 = metadata !{i32 9, i32 0, metadata !1, null} !9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ] -!10 = metadata !{i32 786478, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 786478, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ] !11 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ] !12 = metadata !{null, metadata !6, metadata !13, metadata !14} !13 = metadata !{i32 786468, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -50,3 +50,4 @@ return: ; preds = %entry !17 = metadata !{i32 786689, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14, i32 0, null} ; [ DW_TAG_arg_variable ] !18 = metadata !{i32 5, i32 0, metadata !10, metadata !8} !19 = metadata !{i32 10, i32 0, metadata !1, null} +!20 = metadata !{metadata !"bar.c", metadata !"/tmp/"} diff --git a/test/Transforms/MergeFunc/Stats/lit.local.cfg b/test/Transforms/MergeFunc/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/MergeFunc/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/MergeFunc/Stats/phi-speculation1.ll b/test/Transforms/MergeFunc/phi-speculation1.ll index fd0baffb31..548e5102be 100644 --- a/test/Transforms/MergeFunc/Stats/phi-speculation1.ll +++ b/test/Transforms/MergeFunc/phi-speculation1.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | not grep "functions merged" define i32 @foo1(i32 %x) { diff --git a/test/Transforms/MergeFunc/Stats/phi-speculation2.ll b/test/Transforms/MergeFunc/phi-speculation2.ll index eec8b5c5a9..d42a465d0c 100644 --- a/test/Transforms/MergeFunc/Stats/phi-speculation2.ll +++ b/test/Transforms/MergeFunc/phi-speculation2.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -mergefunc -stats -disable-output 2>&1 | grep "functions merged" define i32 @foo1(i32 %x) { diff --git a/test/Transforms/MergeFunc/Stats/vector.ll b/test/Transforms/MergeFunc/vector.ll index 4af079f8cd..dba5fa349a 100644 --- a/test/Transforms/MergeFunc/Stats/vector.ll +++ b/test/Transforms/MergeFunc/vector.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -mergefunc -stats -disable-output < %s 2>&1 | grep "functions merged" ; This test is checks whether we can merge diff --git a/test/Transforms/MergeFunc/Stats/vectors-and-arrays.ll b/test/Transforms/MergeFunc/vectors-and-arrays.ll index dc64a0858b..22747224a1 100644 --- a/test/Transforms/MergeFunc/Stats/vectors-and-arrays.ll +++ b/test/Transforms/MergeFunc/vectors-and-arrays.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged ; This used to crash with an assert. diff --git a/test/Transforms/ObjCARC/arc-annotations.ll b/test/Transforms/ObjCARC/arc-annotations.ll new file mode 100644 index 0000000000..4c56b4a3de --- /dev/null +++ b/test/Transforms/ObjCARC/arc-annotations.ll @@ -0,0 +1,307 @@ +; This file consists of various tests which ensure that the objc-arc-annotations +; are working correctly. In the future, I will use this in other lit tests to +; check the data flow analysis of ARC. + +; REQUIRES: asserts +; RUN: opt -S -objc-arc -enable-objc-arc-annotations < %s | FileCheck %s + +declare i8* @objc_retain(i8*) +declare i8* @objc_retainAutoreleasedReturnValue(i8*) +declare void @objc_release(i8*) +declare i8* @objc_autorelease(i8*) +declare i8* @objc_autoreleaseReturnValue(i8*) +declare void @objc_autoreleasePoolPop(i8*) +declare i8* @objc_autoreleasePoolPush() +declare i8* @objc_retainBlock(i8*) + +declare i8* @objc_retainedObject(i8*) +declare i8* @objc_unretainedObject(i8*) +declare i8* @objc_unretainedPointer(i8*) + +declare void @use_pointer(i8*) +declare void @callee() +declare void @callee_fnptr(void ()*) +declare void @invokee() +declare i8* @returner() + +; Simple retain+release pair deletion, with some intervening control +; flow and harmless instructions. + +; CHECK: define void @test0( +; CHECK: entry: +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None) +; CHECK: %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !0, !llvm.arc.annotation.topdown !1 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: t: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use) +; CHECK: store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !2 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: f: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use) +; CHECK: store i32 7, i32* %x, !llvm.arc.annotation.bottomup !2 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: return: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release) +; CHECK: call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !3, !llvm.arc.annotation.topdown !4 +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None) +; CHECK: } +define void @test0(i32* %x, i1 %p) nounwind { +entry: + %a = bitcast i32* %x to i8* + %0 = call i8* @objc_retain(i8* %a) nounwind + br i1 %p, label %t, label %f + +t: + store i8 3, i8* %a + %b = bitcast i32* %x to float* + store float 2.0, float* %b + br label %return + +f: + store i32 7, i32* %x + br label %return + +return: + %c = bitcast i32* %x to i8* + call void @objc_release(i8* %c) nounwind + ret void +} + +; Like test0 but the release isn't always executed when the retain is, +; so the optimization is not safe. + +; TODO: Make the objc_release's argument be %0. + +; CHECK: define void @test1( +; CHECK: entry: +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None) +; CHECK: %0 = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !5, !llvm.arc.annotation.topdown !6 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: t: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use) +; CHECK: store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !7 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: f: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None) +; CHECK: call void @callee(), !llvm.arc.annotation.topdown !8 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease) +; CHECK: return: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release) +; CHECK: call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !9 +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None) +; CHECK: alt_return: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None) +; CHECK: } +define void @test1(i32* %x, i1 %p, i1 %q) nounwind { +entry: + %a = bitcast i32* %x to i8* + %0 = call i8* @objc_retain(i8* %a) nounwind + br i1 %p, label %t, label %f + +t: + store i8 3, i8* %a + %b = bitcast i32* %x to float* + store float 2.0, float* %b + br label %return + +f: + store i32 7, i32* %x + call void @callee() + br i1 %q, label %return, label %alt_return + +return: + %c = bitcast i32* %x to i8* + call void @objc_release(i8* %c) nounwind + ret void + +alt_return: + ret void +} + +; Don't do partial elimination into two different CFG diamonds. + +; CHECK: define void @test1b( +; CHECK: entry: +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None) +; CHECK: %0 = tail call i8* @objc_retain(i8* %x) #0, !llvm.arc.annotation.bottomup !10, !llvm.arc.annotation.topdown !11 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_None) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: if.then: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease) +; CHECK: tail call void @callee(), !llvm.arc.annotation.bottomup !12, !llvm.arc.annotation.topdown !13 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease) +; CHECK: if.end: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use) +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Use) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_CanRelease) +; CHECK: if.then3: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_CanRelease) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use) +; CHECK: tail call void @use_pointer(i8* %x), !llvm.arc.annotation.bottomup !14, !llvm.arc.annotation.topdown !15 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_MovableRelease) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use) +; CHECK: if.end5: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_MovableRelease) +; CHECK: tail call void @objc_release(i8* %x) #0, !clang.imprecise_release !16, !llvm.arc.annotation.bottomup !17 +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None) +; CHECK: } +define void @test1b(i8* %x, i1 %p, i1 %q) { +entry: + tail call i8* @objc_retain(i8* %x) nounwind + br i1 %p, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @callee() + br label %if.end + +if.end: ; preds = %if.then, %entry + br i1 %q, label %if.then3, label %if.end5 + +if.then3: ; preds = %if.end + tail call void @use_pointer(i8* %x) + br label %if.end5 + +if.end5: ; preds = %if.then3, %if.end + tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0 + ret void +} + +; Like test0 but the pointer is passed to an intervening call, +; so the optimization is not safe. + +; CHECK: define void @test2( +; CHECK: entry: +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None) +; CHECK: %e = tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !18, !llvm.arc.annotation.topdown !19 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_CanRelease) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: t: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Use) +; CHECK: store float 2.000000e+00, float* %b, !llvm.arc.annotation.bottomup !20 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: f: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_CanRelease) +; CHECK: call void @use_pointer(i8* %e), !llvm.arc.annotation.bottomup !21, !llvm.arc.annotation.topdown !22 +; CHECK: store float 3.000000e+00, float* %d, !llvm.arc.annotation.bottomup !20, !llvm.arc.annotation.topdown !23 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Use) +; CHECK: return: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Use) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release) +; CHECK: call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !24, !llvm.arc.annotation.topdown !25 +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None) +; CHECK: } +define void @test2(i32* %x, i1 %p) nounwind { +entry: + %a = bitcast i32* %x to i8* + %e = call i8* @objc_retain(i8* %a) nounwind + br i1 %p, label %t, label %f + +t: + store i8 3, i8* %a + %b = bitcast i32* %x to float* + store float 2.0, float* %b + br label %return + +f: + store i32 7, i32* %x + call void @use_pointer(i8* %e) + %d = bitcast i32* %x to float* + store float 3.0, float* %d + br label %return + +return: + %c = bitcast i32* %x to i8* + call void @objc_release(i8* %c) nounwind + ret void +} + +; Like test0 but the release is in a loop, +; so the optimization is not safe. + +; TODO: For now, assume this can't happen. + +; CHECK: define void @test3( +; CHECK: entry: +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_None) +; CHECK: tail call i8* @objc_retain(i8* %a) #0, !llvm.arc.annotation.bottomup !26, !llvm.arc.annotation.topdown !27 +; CHECK: call void @llvm.arc.annotation.bottomup.bbend(i8** @x, i8** @S_Release) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_Retain) +; CHECK: loop: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_Retain) +; CHECK: call void @llvm.arc.annotation.bottomup.bbstart(i8** @x, i8** @S_Release) +; CHECK: call void @objc_release(i8* %c) #0, !llvm.arc.annotation.bottomup !28, !llvm.arc.annotation.topdown !29 +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None) +; CHECK: return: +; CHECK: call void @llvm.arc.annotation.topdown.bbstart(i8** @x, i8** @S_None) +; CHECK: call void @llvm.arc.annotation.topdown.bbend(i8** @x, i8** @S_None) +; CHECK: } +define void @test3(i32* %x, i1* %q) nounwind { +entry: + %a = bitcast i32* %x to i8* + %0 = call i8* @objc_retain(i8* %a) nounwind + br label %loop + +loop: + %c = bitcast i32* %x to i8* + call void @objc_release(i8* %c) nounwind + %j = load volatile i1* %q + br i1 %j, label %loop, label %return + +return: + ret void +} + +!0 = metadata !{} + +; CHECK: !0 = metadata !{metadata !"(test0,%x)", metadata !"S_Use", metadata !"S_None"} +; CHECK: !1 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Retain"} +; CHECK: !2 = metadata !{metadata !"(test0,%x)", metadata !"S_Release", metadata !"S_Use"} +; CHECK: !3 = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Release"} +; CHECK: !4 = metadata !{metadata !"(test0,%x)", metadata !"S_Retain", metadata !"S_None"} +; CHECK: !5 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_None"} +; CHECK: !6 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Retain"} +; CHECK: !7 = metadata !{metadata !"(test1,%x)", metadata !"S_Release", metadata !"S_Use"} +; CHECK: !8 = metadata !{metadata !"(test1,%x)", metadata !"S_Retain", metadata !"S_CanRelease"} +; CHECK: !9 = metadata !{metadata !"(test1,%x)", metadata !"S_None", metadata !"S_Release"} +; CHECK: !10 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_None"} +; CHECK: !11 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_Retain"} +; CHECK: !12 = metadata !{metadata !"(test1b,%x)", metadata !"S_Use", metadata !"S_CanRelease"} +; CHECK: !13 = metadata !{metadata !"(test1b,%x)", metadata !"S_Retain", metadata !"S_CanRelease"} +; CHECK: !14 = metadata !{metadata !"(test1b,%x)", metadata !"S_MovableRelease", metadata !"S_Use"} +; CHECK: !15 = metadata !{metadata !"(test1b,%x)", metadata !"S_CanRelease", metadata !"S_Use"} +; CHECK: !16 = metadata !{} +; CHECK: !17 = metadata !{metadata !"(test1b,%x)", metadata !"S_None", metadata !"S_MovableRelease"} +; CHECK: !18 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_None"} +; CHECK: !19 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Retain"} +; CHECK: !20 = metadata !{metadata !"(test2,%x)", metadata !"S_Release", metadata !"S_Use"} +; CHECK: !21 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_CanRelease"} +; CHECK: !22 = metadata !{metadata !"(test2,%x)", metadata !"S_Retain", metadata !"S_CanRelease"} +; CHECK: !23 = metadata !{metadata !"(test2,%x)", metadata !"S_CanRelease", metadata !"S_Use"} +; CHECK: !24 = metadata !{metadata !"(test2,%x)", metadata !"S_None", metadata !"S_Release"} +; CHECK: !25 = metadata !{metadata !"(test2,%x)", metadata !"S_Use", metadata !"S_None"} +; CHECK: !26 = metadata !{metadata !"(test3,%x)", metadata !"S_Release", metadata !"S_None"} +; CHECK: !27 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Retain"} +; CHECK: !28 = metadata !{metadata !"(test3,%x)", metadata !"S_None", metadata !"S_Release"} +; CHECK: !29 = metadata !{metadata !"(test3,%x)", metadata !"S_Retain", metadata !"S_None"} + diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll index 4c24ebf865..bf6ccc8a7f 100644 --- a/test/Transforms/ObjCARC/basic.ll +++ b/test/Transforms/ObjCARC/basic.ll @@ -795,10 +795,10 @@ entry: ret void } -; Don't optimize objc_retainBlock. +; Don't optimize objc_retainBlock, but do strength reduce it. ; CHECK: define void @test23b -; CHECK: @objc_retainBlock +; CHECK: @objc_retain ; CHECK: @objc_release ; CHECK: } define void @test23b(i8* %p) { diff --git a/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll new file mode 100644 index 0000000000..4215b5c364 --- /dev/null +++ b/test/Transforms/ObjCARC/clang-arc-used-intrinsic-removed-if-isolated.ll @@ -0,0 +1,16 @@ +; RUN: opt -objc-arc-contract -S < %s | FileCheck %s + +; This file makes sure that clang.arc.used is removed even if no other ARC +; interesting calls are in the module. + +declare void @clang.arc.use(...) nounwind + +; Kill calls to @clang.arc.use(...) +; CHECK: define void @test0( +; CHECK-NOT: clang.arc.use +; CHECK: } +define void @test0(i8* %a, i8* %b) { + call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind + ret void +} + diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll index b6fba59603..0b60683d99 100644 --- a/test/Transforms/ObjCARC/contract.ll +++ b/test/Transforms/ObjCARC/contract.ll @@ -162,4 +162,15 @@ return: ; preds = %if.then, %entry ret i8* %retval } +; Kill calls to @clang.arc.use(...) +; CHECK: define void @test9( +; CHECK-NOT: clang.arc.use +; CHECK: } +define void @test9(i8* %a, i8* %b) { + call void (...)* @clang.arc.use(i8* %a, i8* %b) nounwind + ret void +} + +declare void @clang.arc.use(...) nounwind + ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll b/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll deleted file mode 100644 index bdee2be94f..0000000000 --- a/test/Transforms/ObjCARC/dont-infinite-loop-during-block-escape-analysis.ll +++ /dev/null @@ -1,87 +0,0 @@ -; RUN: opt -S -objc-arc < %s -; bugzilla://14551 -; rdar://12851911 - -; Make sure that we do not hang clang during escape analysis. - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" -target triple = "x86_64-darwin" - -%struct.__block_descriptor = type { i64, i64 } -%struct.__block_byref_foo = type { i8*, %struct.__block_byref_foo*, i32, i32, i32 } - -@_NSConcreteGlobalBlock = external global i8* -@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00", align 1 -@__block_descriptor_tmp = internal constant { i64, i64, i8*, i8* } { i64 0, i64 32, i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8* null } -@__block_literal_global = internal constant { i8**, i32, i32, i8*, %struct.__block_descriptor* } { i8** @_NSConcreteGlobalBlock, i32 1342177280, i32 0, i8* bitcast (void (i8*)* @__hang_clang_block_invoke to i8*), %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*) }, align 8 - -define void @hang_clang() uwtable optsize ssp { -entry: - %foo = alloca %struct.__block_byref_foo, align 8 - %byref.isa = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 0 - store i8* null, i8** %byref.isa, align 8 - %byref.forwarding = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 1 - store %struct.__block_byref_foo* %foo, %struct.__block_byref_foo** %byref.forwarding, align 8 - %byref.flags = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 2 - store i32 536870912, i32* %byref.flags, align 8 - %byref.size = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 3 - store i32 32, i32* %byref.size, align 4 - %foo1 = getelementptr inbounds %struct.__block_byref_foo* %foo, i64 0, i32 4 - store i32 0, i32* %foo1, align 8, !tbaa !4 - br label %for.body - -for.body: ; preds = %for.inc.for.body_crit_edge, %entry - %0 = phi i1 [ true, %entry ], [ %phitmp, %for.inc.for.body_crit_edge ] - %i.06 = phi i32 [ 1, %entry ], [ %phitmp8, %for.inc.for.body_crit_edge ] - %block.05 = phi void (...)* [ null, %entry ], [ %block.1, %for.inc.for.body_crit_edge ] - br i1 %0, label %for.inc, label %if.then - -if.then: ; preds = %for.body - %1 = call i8* @objc_retainBlock(i8* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) nounwind, !clang.arc.copy_on_escape !7 - %2 = bitcast i8* %1 to void (...)* - %3 = bitcast void (...)* %block.05 to i8* - call void @objc_release(i8* %3) nounwind, !clang.imprecise_release !7 - br label %for.inc - -for.inc: ; preds = %for.body, %if.then - %block.1 = phi void (...)* [ %2, %if.then ], [ %block.05, %for.body ] - %exitcond = icmp eq i32 %i.06, 10 - br i1 %exitcond, label %for.end, label %for.inc.for.body_crit_edge - -for.inc.for.body_crit_edge: ; preds = %for.inc - %.pre = load %struct.__block_byref_foo** %byref.forwarding, align 8 - %foo2.phi.trans.insert = getelementptr inbounds %struct.__block_byref_foo* %.pre, i64 0, i32 4 - %.pre7 = load i32* %foo2.phi.trans.insert, align 4, !tbaa !4 - %phitmp = icmp eq i32 %.pre7, 0 - %phitmp8 = add i32 %i.06, 1 - br label %for.body - -for.end: ; preds = %for.inc - %4 = bitcast %struct.__block_byref_foo* %foo to i8* - call void @_Block_object_dispose(i8* %4, i32 8) - %5 = bitcast void (...)* %block.1 to i8* - call void @objc_release(i8* %5) nounwind, !clang.imprecise_release !7 - ret void -} - -define internal void @__hang_clang_block_invoke(i8* nocapture %.block_descriptor) nounwind uwtable readnone optsize ssp { -entry: - ret void -} - -declare i8* @objc_retainBlock(i8*) - -declare void @objc_release(i8*) nonlazybind - -declare void @_Block_object_dispose(i8*, i32) - -!llvm.module.flags = !{!0, !1, !2, !3} - -!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2} -!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0} -!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"} -!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0} -!4 = metadata !{metadata !"int", metadata !5} -!5 = metadata !{metadata !"omnipotent char", metadata !6} -!6 = metadata !{metadata !"Simple C/C++ TBAA"} -!7 = metadata !{} diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll new file mode 100644 index 0000000000..9c7b81a95d --- /dev/null +++ b/test/Transforms/ObjCARC/intrinsic-use.ll @@ -0,0 +1,63 @@ +; RUN: opt -basicaa -objc-arc -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64" + +declare i8* @objc_retain(i8*) +declare i8* @objc_retainAutorelease(i8*) +declare void @objc_release(i8*) +declare i8* @objc_autorelease(i8*) + +declare void @clang.arc.use(...) + +declare void @test0_helper(i8*, i8**) + +; Ensure that we honor clang.arc.use as a use and don't miscompile +; the reduced test case from <rdar://13195034>. +; +; FIXME: the fact that we re-order retains w.r.t. @clang.arc.use could +; be problematic if we get run twice, e.g. under LTO. +; +; CHECK: define void @test0( +; CHECK: @objc_retain(i8* %x) +; CHECK-NEXT: store i8* %y, i8** %temp0 +; CHECK-NEXT: @objc_retain(i8* %y) +; CHECK-NEXT: call void @test0_helper +; CHECK-NEXT: [[VAL1:%.*]] = load i8** %temp0 +; CHECK-NEXT: call void (...)* @clang.arc.use(i8* %y) +; CHECK-NEXT: @objc_retain(i8* [[VAL1]]) +; CHECK-NEXT: @objc_release(i8* %y) +; CHECK-NEXT: store i8* [[VAL1]], i8** %temp1 +; CHECK-NEXT: call void @test0_helper +; CHECK-NEXT: [[VAL2:%.*]] = load i8** %temp1 +; CHECK-NEXT: call void (...)* @clang.arc.use(i8* [[VAL1]]) +; CHECK-NEXT: @objc_retain(i8* [[VAL2]]) +; CHECK-NEXT: @objc_release(i8* [[VAL1]]) +; CHECK-NEXT: @objc_autorelease(i8* %x) +; CHECK-NEXT: store i8* %x, i8** %out +; CHECK-NEXT: @objc_release(i8* [[VAL2]]) +; CHECK-NEXT: ret void +define void @test0(i8** %out, i8* %x, i8* %y) { +entry: + %temp0 = alloca i8*, align 8 + %temp1 = alloca i8*, align 8 + %0 = call i8* @objc_retain(i8* %x) nounwind + %1 = call i8* @objc_retain(i8* %y) nounwind + store i8* %y, i8** %temp0 + call void @test0_helper(i8* %x, i8** %temp0) + %val1 = load i8** %temp0 + %2 = call i8* @objc_retain(i8* %val1) nounwind + call void (...)* @clang.arc.use(i8* %y) nounwind + call void @objc_release(i8* %y) nounwind + store i8* %val1, i8** %temp1 + call void @test0_helper(i8* %x, i8** %temp1) + %val2 = load i8** %temp1 + %3 = call i8* @objc_retain(i8* %val2) nounwind + call void (...)* @clang.arc.use(i8* %val1) nounwind + call void @objc_release(i8* %val1) nounwind + %4 = call i8* @objc_retain(i8* %x) nounwind + %5 = call i8* @objc_autorelease(i8* %x) nounwind + store i8* %x, i8** %out + call void @objc_release(i8* %val2) nounwind + call void @objc_release(i8* %x) nounwind + ret void +} diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll index 9728f6e0d9..58b5bbe9c7 100644 --- a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll +++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll @@ -59,11 +59,12 @@ lpad: ; preds = %entry resume { i8*, i32 } %t8 } -; There is no !clang.arc.no_objc_arc_exceptions -; metadata here, so the optimizer shouldn't eliminate anything. +; There is no !clang.arc.no_objc_arc_exceptions metadata here, so the optimizer +; shouldn't eliminate anything, but *CAN* strength reduce the objc_retainBlock +; to an objc_retain. ; CHECK: define void @test0_no_metadata( -; CHECK: call i8* @objc_retainBlock( +; CHECK: call i8* @objc_retain( ; CHECK: invoke ; CHECK: call void @objc_release( ; CHECK: } diff --git a/test/Transforms/ObjCARC/retain-block-escape-analysis.ll b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll new file mode 100644 index 0000000000..2c1ddce328 --- /dev/null +++ b/test/Transforms/ObjCARC/retain-block-escape-analysis.ll @@ -0,0 +1,127 @@ +; RUN: opt -S -objc-arc < %s | FileCheck %s + +declare i8* @objc_retain(i8*) nonlazybind +declare void @objc_release(i8*) nonlazybind +declare i8* @objc_retainBlock(i8*) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Use by an instruction which copies the value is an escape if the ; +; result is an escape. The current instructions with this property are: ; +; ; +; 1. BitCast. ; +; 2. GEP. ; +; 3. PhiNode. ; +; 4. SelectInst. ; +; ; +; Make sure that such instructions do not confuse the optimizer into removing ; +; an objc_retainBlock that is needed. ; +; ; +; rdar://13273675. (With extra test cases to handle bitcast, phi, and select. ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define void @bitcasttest(i8* %storage, void (...)* %block) { +; CHECK: define void @bitcasttest +entry: + %t1 = bitcast void (...)* %block to i8* +; CHECK-NOT: tail call i8* @objc_retain + %t2 = tail call i8* @objc_retain(i8* %t1) +; CHECK: tail call i8* @objc_retainBlock + %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0 + %t4 = bitcast i8* %storage to void (...)** + %t5 = bitcast i8* %t3 to void (...)* + store void (...)* %t5, void (...)** %t4, align 8 +; CHECK-NOT: call void @objc_release + call void @objc_release(i8* %t1) + ret void +} + +define void @geptest(void (...)** %storage_array, void (...)* %block) { +; CHECK: define void @geptest +entry: + %t1 = bitcast void (...)* %block to i8* +; CHECK-NOT: tail call i8* @objc_retain + %t2 = tail call i8* @objc_retain(i8* %t1) +; CHECK: tail call i8* @objc_retainBlock + %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0 + %t4 = bitcast i8* %t3 to void (...)* + + %storage = getelementptr inbounds void (...)** %storage_array, i64 0 + + store void (...)* %t4, void (...)** %storage, align 8 +; CHECK-NOT: call void @objc_release + call void @objc_release(i8* %t1) + ret void +} + +define void @selecttest(void (...)** %store1, void (...)** %store2, + void (...)* %block) { +; CHECK: define void @selecttest +entry: + %t1 = bitcast void (...)* %block to i8* +; CHECK-NOT: tail call i8* @objc_retain + %t2 = tail call i8* @objc_retain(i8* %t1) +; CHECK: tail call i8* @objc_retainBlock + %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0 + %t4 = bitcast i8* %t3 to void (...)* + %store = select i1 undef, void (...)** %store1, void (...)** %store2 + store void (...)* %t4, void (...)** %store, align 8 +; CHECK-NOT: call void @objc_release + call void @objc_release(i8* %t1) + ret void +} + +define void @phinodetest(void (...)** %storage1, + void (...)** %storage2, + void (...)* %block) { +; CHECK: define void @phinodetest +entry: + %t1 = bitcast void (...)* %block to i8* +; CHECK-NOT: tail call i8* @objc_retain + %t2 = tail call i8* @objc_retain(i8* %t1) +; CHECK: tail call i8* @objc_retainBlock + %t3 = tail call i8* @objc_retainBlock(i8* %t1), !clang.arc.copy_on_escape !0 + %t4 = bitcast i8* %t3 to void (...)* + br i1 undef, label %store1_set, label %store2_set + +store1_set: + br label %end + +store2_set: + br label %end + +end: + %storage = phi void (...)** [ %storage1, %store1_set ], [ %storage2, %store2_set] + store void (...)* %t4, void (...)** %storage, align 8 +; CHECK-NOT: call void @objc_release + call void @objc_release(i8* %t1) + ret void +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; This test makes sure that we do not hang clang when visiting a use ; +; cycle caused by phi nodes during objc-arc analysis. *NOTE* This ; +; test case looks a little convoluted since it was produced by ; +; bugpoint. ; +; ; +; bugzilla://14551 ; +; rdar://12851911 ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define void @phinode_use_cycle(i8* %block) uwtable optsize ssp { +; CHECK: define void @phinode_use_cycle(i8* %block) +entry: + br label %for.body + +for.body: ; preds = %if.then, %for.body, %entry + %block.05 = phi void (...)* [ null, %entry ], [ %1, %if.then ], [ %block.05, %for.body ] + br i1 undef, label %for.body, label %if.then + +if.then: ; preds = %for.body + %0 = call i8* @objc_retainBlock(i8* %block), !clang.arc.copy_on_escape !0 + %1 = bitcast i8* %0 to void (...)* + %2 = bitcast void (...)* %block.05 to i8* + call void @objc_release(i8* %2) nounwind, !clang.imprecise_release !0 + br label %for.body +} + +!0 = metadata !{} diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll index ee57049d8a..1bb3f0276a 100644 --- a/test/Transforms/ObjCARC/retain-block.ll +++ b/test/Transforms/ObjCARC/retain-block.ll @@ -98,7 +98,7 @@ entry: ; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) [[NUW]] ; CHECK-NEXT: @use_pointer(i8* %tmp2) ; CHECK-NEXT: @use_pointer(i8* %tmp2) -; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0 +; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0 ; CHECK-NOT: @objc ; CHECK: } define void @test1_no_metadata(i8* %tmp) { @@ -122,7 +122,7 @@ entry: ; CHECK-NEXT: store i8* %tmp2, i8** %z ; CHECK-NEXT: @use_pointer(i8* %tmp2) ; CHECK-NEXT: @use_pointer(i8* %tmp2) -; CHECK-NEXT: tail call void @objc_release(i8* %tmp) [[NUW]], !clang.imprecise_release !0 +; CHECK-NEXT: tail call void @objc_release(i8* %tmp2) [[NUW]], !clang.imprecise_release !0 ; CHECK-NOT: @objc ; CHECK: } define void @test1_escape(i8* %tmp, i8** %z) { diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll index e834179bb7..165829f7c0 100644 --- a/test/Transforms/ObjCARC/retain-not-declared.ll +++ b/test/Transforms/ObjCARC/retain-not-declared.ll @@ -21,8 +21,8 @@ define i8* @test0(i8* %p) { entry: %call = tail call i8* @objc_unretainedObject(i8* %p) %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind - %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind - ret i8* %1 + %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %call) nounwind + ret i8* %call } ; Properly create the @objc_retain declaration when it doesn't already exist. diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll new file mode 100644 index 0000000000..d371a9b5b6 --- /dev/null +++ b/test/Transforms/Reassociate/xor_reassoc.ll @@ -0,0 +1,166 @@ +;RUN: opt -S -reassociate < %s | FileCheck %s + +; ========================================================================== +; +; Xor reassociation general cases +; +; ========================================================================== + +; (x | c1) ^ (x | c2) => (x & c3) ^ c3, where c3 = c1^c2 +; +define i32 @xor1(i32 %x) { + %or = or i32 %x, 123 + %or1 = or i32 %x, 456 + %xor = xor i32 %or, %or1 + ret i32 %xor + +;CHECK: @xor1 +;CHECK: %and.ra = and i32 %x, 435 +;CHECK: %xor = xor i32 %and.ra, 435 +} + +; Test rule : (x & c1) ^ (x & c2) = (x & (c1^c2)) +; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y +define i32 @xor2(i32 %x, i32 %y) { + %and = and i32 %x, 123 + %xor = xor i32 %and, %y + %and1 = and i32 %x, 456 + %xor2 = xor i32 %xor, %and1 + ret i32 %xor2 + +;CHECK: @xor2 +;CHECK: %and.ra = and i32 %x, 435 +;CHECK: %xor2 = xor i32 %and.ra, %y +} + +; Test rule: (x | c1) ^ (x & c2) = (x & c3) ^ c1, where c3 = ~c1 ^ c2 +; c3 = ~c1 ^ c2 +define i32 @xor3(i32 %x, i32 %y) { + %or = or i32 %x, 123 + %xor = xor i32 %or, %y + %and = and i32 %x, 456 + %xor1 = xor i32 %xor, %and + ret i32 %xor1 + +;CHECK: @xor3 +;CHECK: %and.ra = and i32 %x, -436 +;CHECK: %xor = xor i32 %y, 123 +;CHECK: %xor1 = xor i32 %xor, %and.ra +} + +; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2) +define i32 @xor4(i32 %x, i32 %y) { + %and = and i32 %x, -124 + %xor = xor i32 %y, 435 + %xor1 = xor i32 %xor, %and + ret i32 %xor1 +; CHECK: @xor4 +; CHECK: %and = and i32 %x, -124 +; CHECK: %xor = xor i32 %y, 435 +; CHECK: %xor1 = xor i32 %xor, %and +} + +; ========================================================================== +; +; Xor reassociation special cases +; +; ========================================================================== + +; Special case1: +; (x | c1) ^ (x & ~c1) = c1 +define i32 @xor_special1(i32 %x, i32 %y) { + %or = or i32 %x, 123 + %xor = xor i32 %or, %y + %and = and i32 %x, -124 + %xor1 = xor i32 %xor, %and + ret i32 %xor1 +; CHECK: @xor_special1 +; CHECK: %xor1 = xor i32 %y, 123 +; CHECK: ret i32 %xor1 +} + +; Special case1: +; (x | c1) ^ (x & c1) = x ^ c1 +define i32 @xor_special2(i32 %x, i32 %y) { + %or = or i32 %x, 123 + %xor = xor i32 %or, %y + %and = and i32 %x, 123 + %xor1 = xor i32 %xor, %and + ret i32 %xor1 +; CHECK: @xor_special2 +; CHECK: %xor = xor i32 %y, 123 +; CHECK: %xor1 = xor i32 %xor, %x +; CHECK: ret i32 %xor1 +} + +; (x | c1) ^ (x | c1) => 0 +define i32 @xor_special3(i32 %x) { + %or = or i32 %x, 123 + %or1 = or i32 %x, 123 + %xor = xor i32 %or, %or1 + ret i32 %xor +;CHECK: @xor_special3 +;CHECK: ret i32 0 +} + +; (x & c1) ^ (x & c1) => 0 +define i32 @xor_special4(i32 %x) { + %or = and i32 %x, 123 + %or1 = and i32 123, %x + %xor = xor i32 %or, %or1 + ret i32 %xor +;CHECK: @xor_special4 +;CHECK: ret i32 0 +} + +; ========================================================================== +; +; Xor reassociation curtail code size +; +; ========================================================================== + +; (x | c1) ^ (x | c2) => (x & c3) ^ c3 +; is enabled if one of operands has multiple uses +; +define i32 @xor_ra_size1(i32 %x) { + %or = or i32 %x, 123 + %or1 = or i32 %x, 456 + %xor = xor i32 %or, %or1 + + %add = add i32 %xor, %or + ret i32 %add +;CHECK: @xor_ra_size1 +;CHECK: %xor = xor i32 %and.ra, 435 +} + +; (x | c1) ^ (x | c2) => (x & c3) ^ c3 +; is disenabled if bothf operands has multiple uses. +; +define i32 @xor_ra_size2(i32 %x) { + %or = or i32 %x, 123 + %or1 = or i32 %x, 456 + %xor = xor i32 %or, %or1 + + %add = add i32 %xor, %or + %add2 = add i32 %add, %or1 + ret i32 %add2 + +;CHECK: @xor_ra_size2 +;CHECK: %or1 = or i32 %x, 456 +;CHECK: %xor = xor i32 %or, %or1 +} + + +; ========================================================================== +; +; Xor reassociation bugs +; +; ========================================================================== + +@xor_bug1_data = external global <{}>, align 4 +define void @xor_bug1() { + %1 = ptrtoint i32* undef to i64 + %2 = xor i64 %1, ptrtoint (<{}>* @xor_bug1_data to i64) + %3 = and i64 undef, %2 + ret void +} diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index efc01acd59..30dd217743 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -500,14 +500,27 @@ entry: define i64 @test9() { ; Ensure we can handle loads off the end of an alloca even when wrapped in -; weird bit casts and types. The result is undef, but this shouldn't crash -; anything. +; weird bit casts and types. This is valid IR due to the alignment and masking +; off the bits past the end of the alloca. +; ; CHECK: @test9 ; CHECK-NOT: alloca -; CHECK: ret i64 undef +; CHECK: %[[b2:.*]] = zext i8 26 to i64 +; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16 +; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681 +; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]] +; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64 +; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8 +; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281 +; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]] +; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64 +; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256 +; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]] +; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215 +; CHECK-NEXT: ret i64 %[[result]] entry: - %a = alloca { [3 x i8] } + %a = alloca { [3 x i8] }, align 8 %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0 store i8 0, i8* %gep1, align 1 %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1 @@ -516,7 +529,8 @@ entry: store i8 26, i8* %gep3, align 1 %cast = bitcast { [3 x i8] }* %a to { i64 }* %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0 - %result = load i64* %elt + %load = load i64* %elt + %result = and i64 %load, 16777215 ret i64 %result } @@ -617,11 +631,12 @@ define i32 @test13() { ; Ensure we don't crash and handle undefined loads that straddle the end of the ; allocation. ; CHECK: @test13 -; CHECK: %[[ret:.*]] = zext i16 undef to i32 -; CHECK: ret i32 %[[ret]] +; CHECK: %[[value:.*]] = zext i8 0 to i16 +; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32 +; CHECK-NEXT: ret i32 %[[ret]] entry: - %a = alloca [3 x i8] + %a = alloca [3 x i8], align 2 %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0 store i8 0, i8* %b0ptr %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1 @@ -1160,20 +1175,25 @@ define void @PR14548(i1 %x) { entry: %a = alloca <{ i1 }>, align 8 %b = alloca <{ i1 }>, align 8 -; Nothing of interest is simplified here. -; CHECK: alloca -; CHECK: alloca +; CHECK: %[[a:.*]] = alloca i8, align 8 %b.i1 = bitcast <{ i1 }>* %b to i1* store i1 %x, i1* %b.i1, align 8 %b.i8 = bitcast <{ i1 }>* %b to i8* %foo = load i8* %b.i8, align 1 +; CHECK-NEXT: {{.*}} = zext i1 %x to i8 +; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8 +; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8 +; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8 %a.i8 = bitcast <{ i1 }>* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind %bar = load i8* %a.i8, align 1 %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0 %baz = load i1* %a.i1, align 1 +; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1* +; CHECK-NEXT: {{.*}} = load i1* %[[a_cast]], align 8 + ret void } diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll index 921016a9c2..b9931800e7 100644 --- a/test/Transforms/SROA/phi-and-select.ll +++ b/test/Transforms/SROA/phi-and-select.ll @@ -396,9 +396,10 @@ define i64 @PR14132(i1 %flag) { ; Here we form a PHI-node by promoting the pointer alloca first, and then in ; order to promote the other two allocas, we speculate the load of the ; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8 -; alloca, which is completely bogus. However, we were asserting on trying to -; rewrite it. Now it is replaced with undef. Eventually we may replace it with -; unrechable and even the CFG will go away here. +; alloca. While this is a bit dubious, we were asserting on trying to +; rewrite it. The trick is that the code using the value may carefully take +; steps to only use the not-undef bits, and so we need to at least loosely +; support this.. entry: %a = alloca i64 %b = alloca i8 @@ -414,13 +415,14 @@ entry: if.then: store i8* %b, i8** %ptr.cast br label %if.end +; CHECK-NOT: store +; CHECK: %[[ext:.*]] = zext i8 1 to i64 if.end: %tmp = load i64** %ptr %result = load i64* %tmp -; CHECK-NOT: store ; CHECK-NOT: load -; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ] +; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ] ret i64 %result ; CHECK-NEXT: ret i64 %[[result]] diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll index 8a59c2ec2a..7d3bcea8b8 100644 --- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll +++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll @@ -40,11 +40,10 @@ entry: declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !llvm.dbg.cu = !{!0} -!17 = metadata !{metadata !1} -!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !"clang version 3.0 (trunk 131941)", i1 true, i1 false, metadata !"", i32 0, null, null, metadata !17, null, null} ; [ DW_TAG_compile_unit ] -!1 = metadata !{i32 786478, i32 0, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] -!2 = metadata !{i32 786473, metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b", metadata !0} ; [ DW_TAG_file_type ] +!0 = metadata !{i32 786449, i32 0, i32 12, metadata !2, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, null, null, metadata !17, null, null} ; [ DW_TAG_compile_unit ] +!1 = metadata !{i32 786478, metadata !2, metadata !"f", metadata !"f", metadata !"", metadata !2, i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ] !3 = metadata !{i32 786453, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] !4 = metadata !{metadata !5} !5 = metadata !{i32 786468, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] @@ -59,3 +58,5 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone !14 = metadata !{i32 3, i32 5, metadata !11, null} !15 = metadata !{i32 4, i32 5, metadata !11, null} !16 = metadata !{i32 5, i32 5, metadata !11, null} +!17 = metadata !{metadata !1} +!18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"} diff --git a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll b/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll deleted file mode 100644 index ac89199b0e..0000000000 --- a/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll +++ /dev/null @@ -1,13 +0,0 @@ -; RUN: opt < %s -inline -simplify-libcalls -functionattrs | \ -; RUN: llvm-dis | grep nocapture | count 2 -; Check that nocapture attributes are added when run after an SCC pass. -; PR3520 - -define i32 @use(i8* %x) nounwind readonly { -entry: - %0 = tail call i64 @strlen(i8* %x) nounwind readonly ; <i64> [#uses=1] - %1 = trunc i64 %0 to i32 ; <i32> [#uses=1] - ret i32 %1 -} - -declare i64 @strlen(i8*) nounwind readonly diff --git a/test/Transforms/TailCallElim/Stats/lit.local.cfg b/test/Transforms/TailCallElim/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/TailCallElim/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/Transforms/TailCallElim/Stats/ackermann.ll b/test/Transforms/TailCallElim/ackermann.ll index 5b5dbcc225..83d98b84ea 100644 --- a/test/Transforms/TailCallElim/Stats/ackermann.ll +++ b/test/Transforms/TailCallElim/ackermann.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; This function contains two tail calls, which should be eliminated ; RUN: opt < %s -tailcallelim -stats -disable-output 2>&1 | grep "2 tailcallelim" diff --git a/test/Transforms/TailCallElim/Stats/dup_tail.ll b/test/Transforms/TailCallElim/dup_tail.ll index 3b87ed3ca6..f5b87f2764 100644 --- a/test/Transforms/TailCallElim/Stats/dup_tail.ll +++ b/test/Transforms/TailCallElim/dup_tail.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; Duplicate the return into if.end to enable TCE. ; RUN: opt -tailcallelim -stats -disable-output < %s 2>&1 | FileCheck %s diff --git a/test/Transforms/TailDup/Stats/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll index 7853d7ba06..292186020f 100644 --- a/test/Transforms/TailDup/Stats/2008-06-11-AvoidDupLoopHeader.ll +++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll @@ -1,3 +1,4 @@ +; REQUIRES: asserts ; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output 2>&1 | not grep tailduplicate ; XFAIL: * diff --git a/test/Transforms/TailDup/Stats/lit.local.cfg b/test/Transforms/TailDup/Stats/lit.local.cfg deleted file mode 100644 index 89c0cd9d48..0000000000 --- a/test/Transforms/TailDup/Stats/lit.local.cfg +++ /dev/null @@ -1,4 +0,0 @@ -config.suffixes = ['.ll', '.c', '.cpp'] - -if not config.root.enable_assertions: - config.unsupported = True diff --git a/test/lit.cfg b/test/lit.cfg index 128bbe927a..216e3c1aa0 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -252,6 +252,13 @@ if loadable_module: if config.lto_is_enabled == "1" and platform.system() == "Darwin": config.available_features.add('lto_on_osx') +# Sanitizers. +if config.llvm_use_sanitizer == "Address": + config.available_features.add("asan") +if (config.llvm_use_sanitizer == "Memory" or + config.llvm_use_sanitizer == "MemoryWithOrigins"): + config.available_features.add("msan") + # llc knows whether he is compiled with -DNDEBUG. import subprocess try: diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in index bfd901a079..8024b24fcd 100644 --- a/test/lit.site.cfg.in +++ b/test/lit.site.cfg.in @@ -18,6 +18,7 @@ config.llvm_bindings = "@LLVM_BINDINGS@" config.host_os = "@HOST_OS@" config.host_arch = "@HOST_ARCH@" config.llvm_use_intel_jitevents = "@LLVM_USE_INTEL_JITEVENTS@" +config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" # Support substitution of the tools_dir with user parameters. This is # used when we can't determine the tool dir at configuration time. diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 4cf5aa5105..5e9560491e 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -49,7 +49,8 @@ endif() if( LLVM_ENABLE_PIC ) # TODO: support other systems: - if( CMAKE_SYSTEM_NAME STREQUAL "Linux" ) + if( (CMAKE_SYSTEM_NAME STREQUAL "Linux") + OR (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") ) add_subdirectory(gold) endif() endif() diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp index cede4ac3ae..e49a96b1e0 100644 --- a/tools/bugpoint/BugDriver.cpp +++ b/tools/bugpoint/BugDriver.cpp @@ -16,12 +16,12 @@ #include "BugDriver.h" #include "ToolRunner.h" #include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/Linker.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Host.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include <memory> diff --git a/tools/bugpoint/CMakeLists.txt b/tools/bugpoint/CMakeLists.txt index 3c5e64fdab..e990cfcbba 100644 --- a/tools/bugpoint/CMakeLists.txt +++ b/tools/bugpoint/CMakeLists.txt @@ -1,5 +1,5 @@ set(LLVM_LINK_COMPONENTS asmparser instrumentation scalaropts ipo - linker bitreader bitwriter vectorize objcarcopts) + linker bitreader bitwriter irreader vectorize objcarcopts) add_llvm_tool(bugpoint BugDriver.cpp diff --git a/tools/bugpoint/LLVMBuild.txt b/tools/bugpoint/LLVMBuild.txt index e03c594bf9..01643553c5 100644 --- a/tools/bugpoint/LLVMBuild.txt +++ b/tools/bugpoint/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = bugpoint parent = Tools -required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Linker Scalar ObjCARC +required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Linker Scalar ObjCARC diff --git a/tools/bugpoint/Makefile b/tools/bugpoint/Makefile index 65ffc13022..20493218b0 100644 --- a/tools/bugpoint/Makefile +++ b/tools/bugpoint/Makefile @@ -10,6 +10,6 @@ LEVEL := ../.. TOOLNAME := bugpoint LINK_COMPONENTS := asmparser instrumentation scalaropts ipo linker bitreader \ - bitwriter vectorize objcarcopts + bitwriter irreader vectorize objcarcopts include $(LEVEL)/Makefile.common diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt index 683f29862d..e5a5550e9e 100644 --- a/tools/llc/CMakeLists.txt +++ b/tools/llc/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser) +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser irreader) add_llvm_tool(llc llc.cpp diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt index 8c8794f620..45cdc6498f 100644 --- a/tools/llc/LLVMBuild.txt +++ b/tools/llc/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llc parent = Tools -required_libraries = AsmParser BitReader all-targets +required_libraries = AsmParser BitReader IRReader all-targets diff --git a/tools/llc/Makefile b/tools/llc/Makefile index b32d5575d5..c24f378bc5 100644 --- a/tools/llc/Makefile +++ b/tools/llc/Makefile @@ -9,7 +9,7 @@ LEVEL := ../.. TOOLNAME := llc -LINK_COMPONENTS := all-targets bitreader asmparser +LINK_COMPONENTS := all-targets bitreader asmparser irreader include $(LEVEL)/Makefile.common diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index aa65223473..1dce9d7b60 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/LinkAllCodegenComponents.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" @@ -28,11 +29,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Host.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PluginLoader.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt index 356233f397..aaa6598e71 100644 --- a/tools/lli/CMakeLists.txt +++ b/tools/lli/CMakeLists.txt @@ -1,5 +1,5 @@ -set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native) +set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag native) if( LLVM_USE_OPROFILE ) set(LLVM_LINK_COMPONENTS diff --git a/tools/lli/LLVMBuild.txt b/tools/lli/LLVMBuild.txt index 36ceb39b12..5823792ff0 100644 --- a/tools/lli/LLVMBuild.txt +++ b/tools/lli/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = lli parent = Tools -required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG Native +required_libraries = AsmParser BitReader IRReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG Native diff --git a/tools/lli/Makefile b/tools/lli/Makefile index 85ac6b46bb..a6530584a2 100644 --- a/tools/lli/Makefile +++ b/tools/lli/Makefile @@ -12,7 +12,7 @@ TOOLNAME := lli include $(LEVEL)/Makefile.config -LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag native +LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag native # If Intel JIT Events support is confiured, link against the LLVM Intel JIT # Events interface library diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index 332660fc1e..297763fcfb 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -29,11 +29,11 @@ #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Format.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Memory.h" @@ -42,6 +42,7 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" #include <cerrno> diff --git a/tools/llvm-diff/CMakeLists.txt b/tools/llvm-diff/CMakeLists.txt index c59d69ea0d..0df8b9ed79 100644 --- a/tools/llvm-diff/CMakeLists.txt +++ b/tools/llvm-diff/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_LINK_COMPONENTS support asmparser bitreader) +set(LLVM_LINK_COMPONENTS support asmparser bitreader irreader) add_llvm_tool(llvm-diff llvm-diff.cpp diff --git a/tools/llvm-diff/LLVMBuild.txt b/tools/llvm-diff/LLVMBuild.txt index fa06a03353..5adfdc2bd6 100644 --- a/tools/llvm-diff/LLVMBuild.txt +++ b/tools/llvm-diff/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llvm-diff parent = Tools -required_libraries = AsmParser BitReader +required_libraries = AsmParser BitReader IRReader diff --git a/tools/llvm-diff/Makefile b/tools/llvm-diff/Makefile index f7fa7159c5..bd97a6a9f5 100644 --- a/tools/llvm-diff/Makefile +++ b/tools/llvm-diff/Makefile @@ -9,7 +9,7 @@ LEVEL := ../.. TOOLNAME := llvm-diff -LINK_COMPONENTS := asmparser bitreader +LINK_COMPONENTS := asmparser bitreader irreader # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp index 0b9e92b1b8..6eca1e2bfc 100644 --- a/tools/llvm-diff/llvm-diff.cpp +++ b/tools/llvm-diff/llvm-diff.cpp @@ -19,8 +19,8 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" diff --git a/tools/llvm-extract/CMakeLists.txt b/tools/llvm-extract/CMakeLists.txt index a4e3266e35..3163c4bbbd 100644 --- a/tools/llvm-extract/CMakeLists.txt +++ b/tools/llvm-extract/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter) +set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter irreader) add_llvm_tool(llvm-extract llvm-extract.cpp diff --git a/tools/llvm-extract/LLVMBuild.txt b/tools/llvm-extract/LLVMBuild.txt index 1b1a4c36cd..70e3507a73 100644 --- a/tools/llvm-extract/LLVMBuild.txt +++ b/tools/llvm-extract/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llvm-extract parent = Tools -required_libraries = AsmParser BitReader BitWriter IPO +required_libraries = AsmParser BitReader BitWriter IRReader IPO diff --git a/tools/llvm-extract/Makefile b/tools/llvm-extract/Makefile index a1e93f5ce4..d371c54759 100644 --- a/tools/llvm-extract/Makefile +++ b/tools/llvm-extract/Makefile @@ -9,7 +9,7 @@ LEVEL := ../.. TOOLNAME := llvm-extract -LINK_COMPONENTS := ipo bitreader bitwriter asmparser +LINK_COMPONENTS := ipo bitreader bitwriter asmparser irreader # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp index 85a921118a..fd0a381807 100644 --- a/tools/llvm-extract/llvm-extract.cpp +++ b/tools/llvm-extract/llvm-extract.cpp @@ -19,13 +19,14 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Regex.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/SystemUtils.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Transforms/IPO.h" diff --git a/tools/llvm-jitlistener/CMakeLists.txt b/tools/llvm-jitlistener/CMakeLists.txt index d429af928f..c9704fb224 100644 --- a/tools/llvm-jitlistener/CMakeLists.txt +++ b/tools/llvm-jitlistener/CMakeLists.txt @@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS debuginfo
inteljitevents
interpreter
+ irreader
jit
mcjit
nativecodegen
diff --git a/tools/llvm-jitlistener/LLVMBuild.txt b/tools/llvm-jitlistener/LLVMBuild.txt index c436dd90f9..1ce78acecb 100644 --- a/tools/llvm-jitlistener/LLVMBuild.txt +++ b/tools/llvm-jitlistener/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llvm-jitlistener parent = Tools -required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen Object SelectionDAG Native +required_libraries = AsmParser BitReader IRReader Interpreter JIT MCJIT NativeCodeGen Object SelectionDAG Native diff --git a/tools/llvm-jitlistener/Makefile b/tools/llvm-jitlistener/Makefile index 30182355c9..b132227317 100644 --- a/tools/llvm-jitlistener/Makefile +++ b/tools/llvm-jitlistener/Makefile @@ -12,7 +12,7 @@ TOOLNAME := llvm-jitlistener include $(LEVEL)/Makefile.config
-LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag Object
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser irreader selectiondag Object
# If Intel JIT Events support is configured, link against the LLVM Intel JIT
# Events interface library. If not, this tool will do nothing useful, but it
diff --git a/tools/llvm-jitlistener/llvm-jitlistener.cpp b/tools/llvm-jitlistener/llvm-jitlistener.cpp index d6f5032d6e..dbaf075e91 100644 --- a/tools/llvm-jitlistener/llvm-jitlistener.cpp +++ b/tools/llvm-jitlistener/llvm-jitlistener.cpp @@ -22,9 +22,9 @@ #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" diff --git a/tools/llvm-link/CMakeLists.txt b/tools/llvm-link/CMakeLists.txt index 11933f7f95..4df53564e1 100644 --- a/tools/llvm-link/CMakeLists.txt +++ b/tools/llvm-link/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser) +set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser irreader) add_llvm_tool(llvm-link llvm-link.cpp diff --git a/tools/llvm-link/LLVMBuild.txt b/tools/llvm-link/LLVMBuild.txt index 6399dede78..2e386f3c23 100644 --- a/tools/llvm-link/LLVMBuild.txt +++ b/tools/llvm-link/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = llvm-link parent = Tools -required_libraries = AsmParser BitReader BitWriter Linker +required_libraries = AsmParser BitReader BitWriter IRReader Linker diff --git a/tools/llvm-link/Makefile b/tools/llvm-link/Makefile index 2553db0cd3..ed30d2d256 100644 --- a/tools/llvm-link/Makefile +++ b/tools/llvm-link/Makefile @@ -9,7 +9,7 @@ LEVEL := ../.. TOOLNAME := llvm-link -LINK_COMPONENTS := linker bitreader bitwriter asmparser +LINK_COMPONENTS := linker bitreader bitwriter asmparser irreader # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS := 1 diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp index f6c9f11a5e..83665cc175 100644 --- a/tools/llvm-link/llvm-link.cpp +++ b/tools/llvm-link/llvm-link.cpp @@ -17,12 +17,13 @@ #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/SystemUtils.h" #include "llvm/Support/ToolOutputFile.h" #include <memory> @@ -111,9 +112,6 @@ int main(int argc, char **argv) { } } - // TODO: Iterate over the -l list and link in any modules containing - // global symbols that have not been resolved so far. - if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite; std::string ErrorInfo; diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp index 8f0917fc91..ea37d105dc 100644 --- a/tools/llvm-readobj/llvm-readobj.cpp +++ b/tools/llvm-readobj/llvm-readobj.cpp @@ -39,13 +39,13 @@ static cl::opt<std::string> InputFilename(cl::Positional, cl::desc("<input object>"), cl::init("")); static void dumpSymbolHeader() { - outs() << format(" %-32s", (const char*)"Name") - << format(" %-4s", (const char*)"Type") - << format(" %-16s", (const char*)"Address") - << format(" %-16s", (const char*)"Size") - << format(" %-16s", (const char*)"FileOffset") - << format(" %-26s", (const char*)"Flags") - << "\n"; + outs() << format(" %-32s", (const char *)"Name") + << format(" %-4s", (const char *)"Type") + << format(" %-4s", (const char *)"Section") + << format(" %-16s", (const char *)"Address") + << format(" %-16s", (const char *)"Size") + << format(" %-16s", (const char *)"FileOffset") + << format(" %-26s", (const char *)"Flags") << "\n"; } static void dumpSectionHeader() { @@ -145,6 +145,14 @@ dumpSymbol(const SymbolRef &Sym, const ObjectFile *obj, bool IsDynamic) { checkError(Sym.getFlags(Flags), "SymbolRef.getFlags() failed"); std::string FullName = Name; + llvm::object::section_iterator symSection(obj->begin_sections()); + Sym.getSection(symSection); + StringRef sectionName; + + if (symSection != obj->end_sections()) + checkError(symSection->getName(sectionName), + "SectionRef::getName() failed"); + // If this is a dynamic symbol from an ELF object, append // the symbol's version to the name. if (IsDynamic && obj->isELF()) { @@ -160,11 +168,10 @@ dumpSymbol(const SymbolRef &Sym, const ObjectFile *obj, bool IsDynamic) { // format() can't handle StringRefs outs() << format(" %-32s", FullName.c_str()) << format(" %-4s", getTypeStr(Type)) - << format(" %16" PRIx64, Address) - << format(" %16" PRIx64, Size) - << format(" %16" PRIx64, FileOffset) - << " " << getSymbolFlagStr(Flags) - << "\n"; + << format(" %-32s", std::string(sectionName).c_str()) + << format(" %16" PRIx64, Address) << format(" %16" PRIx64, Size) + << format(" %16" PRIx64, FileOffset) << " " + << getSymbolFlagStr(Flags) << "\n"; } static void dumpStaticSymbol(const SymbolRef &Sym, const ObjectFile *obj) { diff --git a/tools/llvm-stress/Makefile b/tools/llvm-stress/Makefile index 90d57c3fa9..8767cbe417 100644 --- a/tools/llvm-stress/Makefile +++ b/tools/llvm-stress/Makefile @@ -10,7 +10,7 @@ LEVEL := ../.. TOOLNAME := llvm-stress LINK_COMPONENTS := object -LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo +LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts ipo # This tool has no plugins, optimize startup time. TOOL_NO_EXPORTS = 1 diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp index 86ea34bff6..29d91a0e92 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.cpp +++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "LLVMSymbolize.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Object/MachO.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Path.h" @@ -186,6 +187,10 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, return ss.str(); } +void LLVMSymbolizer::flush() { + DeleteContainerSeconds(Modules); +} + // Returns true if the object endianness is known. static bool getObjectEndianness(const ObjectFile *Obj, bool &IsLittleEndian) { // FIXME: Implement this when libLLVMObject allows to do it easily. diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h index e6220aa4ce..0733dfbbc5 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.h +++ b/tools/llvm-symbolizer/LLVMSymbolize.h @@ -50,6 +50,7 @@ public: symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset); std::string symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); + void flush(); private: ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName); std::string printDILineInfo(DILineInfo LineInfo) const; diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index 75705154e4..e7c83f94f5 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -44,6 +44,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/ObjCARC.h" using namespace llvm; static cl::opt<bool> @@ -390,14 +391,18 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, // Make sure everything is still good. passes.add(createVerifierPass()); - FunctionPassManager *codeGenPasses = new FunctionPassManager(mergedModule); + PassManager codeGenPasses; - codeGenPasses->add(new DataLayout(*_target->getDataLayout())); - _target->addAnalysisPasses(*codeGenPasses); + codeGenPasses.add(new DataLayout(*_target->getDataLayout())); + _target->addAnalysisPasses(codeGenPasses); formatted_raw_ostream Out(out); - if (_target->addPassesToEmitFile(*codeGenPasses, Out, + // If the bitcode files contain ARC code and were compiled with optimization, + // the ObjCARCContractPass must be run, so do it unconditionally here. + codeGenPasses.add(createObjCARCContractPass()); + + if (_target->addPassesToEmitFile(codeGenPasses, Out, TargetMachine::CGFT_ObjectFile)) { errMsg = "target file type not supported"; return true; @@ -407,15 +412,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, passes.run(*mergedModule); // Run the code generator, and write assembly file - codeGenPasses->doInitialization(); - - for (Module::iterator - it = mergedModule->begin(), e = mergedModule->end(); it != e; ++it) - if (!it->isDeclaration()) - codeGenPasses->run(*it); - - codeGenPasses->doFinalization(); - delete codeGenPasses; + codeGenPasses.run(*mergedModule); return false; // success } diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt index cf5e5a83cf..1ff8efbed0 100644 --- a/tools/opt/CMakeLists.txt +++ b/tools/opt/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter instrumentation scalaropts objcarcopts ipo vectorize) +set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter irreader instrumentation scalaropts objcarcopts ipo vectorize) add_llvm_tool(opt AnalysisWrappers.cpp diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt index a866d12a26..77b94469ed 100644 --- a/tools/opt/LLVMBuild.txt +++ b/tools/opt/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Tool name = opt parent = Tools -required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar ObjCARC all-targets +required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Scalar ObjCARC all-targets diff --git a/tools/opt/Makefile b/tools/opt/Makefile index 79ed815dce..a451005574 100644 --- a/tools/opt/Makefile +++ b/tools/opt/Makefile @@ -9,6 +9,6 @@ LEVEL := ../.. TOOLNAME := opt -LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts objcarcopts ipo vectorize all-targets +LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts objcarcopts ipo vectorize all-targets include $(LEVEL)/Makefile.common diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index 81a2de2d55..ba82bded2b 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -26,17 +26,18 @@ #include "llvm/DebugInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Module.h" +#include "llvm/IRReader/IRReader.h" #include "llvm/LinkAllIR.h" #include "llvm/LinkAllPasses.h" #include "llvm/MC/SubtargetFeature.h" #include "llvm/PassManager.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/IRReader.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PassNameParser.h" #include "llvm/Support/PluginLoader.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/SystemUtils.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" diff --git a/unittests/ADT/SmallPtrSetTest.cpp b/unittests/ADT/SmallPtrSetTest.cpp index 9114875e00..f85d7c941e 100644 --- a/unittests/ADT/SmallPtrSetTest.cpp +++ b/unittests/ADT/SmallPtrSetTest.cpp @@ -17,6 +17,61 @@ using namespace llvm; // SmallPtrSet swapping test. +TEST(SmallPtrSetTest, GrowthTest) { + int i; + int buf[8]; + for(i=0; i<8; ++i) buf[i]=0; + + + SmallPtrSet<int *, 4> s; + typedef SmallPtrSet<int *, 4>::iterator iter; + + s.insert(&buf[0]); + s.insert(&buf[1]); + s.insert(&buf[2]); + s.insert(&buf[3]); + EXPECT_EQ(4U, s.size()); + + i = 0; + for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) + (**I)++; + EXPECT_EQ(4, i); + for(i=0; i<8; ++i) + EXPECT_EQ(i<4?1:0,buf[i]); + + s.insert(&buf[4]); + s.insert(&buf[5]); + s.insert(&buf[6]); + s.insert(&buf[7]); + + i = 0; + for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) + (**I)++; + EXPECT_EQ(8, i); + s.erase(&buf[4]); + s.erase(&buf[5]); + s.erase(&buf[6]); + s.erase(&buf[7]); + EXPECT_EQ(4U, s.size()); + + i = 0; + for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) + (**I)++; + EXPECT_EQ(4, i); + for(i=0; i<8; ++i) + EXPECT_EQ(i<4?3:1,buf[i]); + + s.clear(); + for(i=0; i<8; ++i) buf[i]=0; + for(i=0; i<128; ++i) s.insert(&buf[i%8]); // test repeated entires + EXPECT_EQ(8U, s.size()); + for(iter I=s.begin(), E=s.end(); I!=E; ++I, ++i) + (**I)++; + for(i=0; i<8; ++i) + EXPECT_EQ(1,buf[i]); +} + + TEST(SmallPtrSetTest, SwapTest) { int buf[10]; diff --git a/unittests/ExecutionEngine/CMakeLists.txt b/unittests/ExecutionEngine/CMakeLists.txt index ed7f10a23c..4eefc1e3bb 100644 --- a/unittests/ExecutionEngine/CMakeLists.txt +++ b/unittests/ExecutionEngine/CMakeLists.txt @@ -6,5 +6,9 @@ add_llvm_unittest(ExecutionEngineTests ExecutionEngineTest.cpp ) -add_subdirectory(JIT) -add_subdirectory(MCJIT) +# Include JIT/MCJIT tests only if native arch is a JIT target. +list(FIND LLVM_TARGETS_WITH_JIT "${LLVM_NATIVE_ARCH}" have_jit) +if (NOT have_jit EQUAL -1 ) + add_subdirectory(JIT) + add_subdirectory(MCJIT) +endif() diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile index ca1195631a..c779a6a47c 100644 --- a/unittests/ExecutionEngine/Makefile +++ b/unittests/ExecutionEngine/Makefile @@ -10,7 +10,10 @@ LEVEL = ../.. TESTNAME = ExecutionEngine LINK_COMPONENTS :=interpreter -PARALLEL_DIRS = JIT MCJIT + +ifeq ($(TARGET_HAS_JIT),1) + PARALLEL_DIRS = JIT MCJIT +endif include $(LEVEL)/Makefile.config include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp index a4137619f4..8ddad38ecf 100644 --- a/unittests/Support/ManagedStatic.cpp +++ b/unittests/Support/ManagedStatic.cpp @@ -19,7 +19,7 @@ using namespace llvm; namespace { -#ifdef HAVE_PTHREAD_H +#if defined(HAVE_PTHREAD_H) && !__has_feature(memory_sanitizer) namespace test1 { llvm::ManagedStatic<int> ms; void *helper(void*) { diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp index 878c22796a..4511259797 100644 --- a/unittests/Support/Path.cpp +++ b/unittests/Support/Path.cpp @@ -224,6 +224,18 @@ TEST_F(FileSystemTest, TempFiles) { // Make sure Temp1 doesn't exist. ASSERT_NO_ERROR(fs::exists(Twine(TempPath), TempFileExists)); EXPECT_FALSE(TempFileExists); + +#ifdef LLVM_ON_WIN32 + // Path name > 260 chars should get an error. + const char *Path270 = + "abcdefghijklmnopqrstuvwxyz9abcdefghijklmnopqrstuvwxyz8" + "abcdefghijklmnopqrstuvwxyz7abcdefghijklmnopqrstuvwxyz6" + "abcdefghijklmnopqrstuvwxyz5abcdefghijklmnopqrstuvwxyz4" + "abcdefghijklmnopqrstuvwxyz3abcdefghijklmnopqrstuvwxyz2" + "abcdefghijklmnopqrstuvwxyz1abcdefghijklmnopqrstuvwxyz0"; + EXPECT_EQ(fs::unique_file(Twine(Path270), FileDescriptor, TempPath), + windows_error::path_not_found); +#endif } TEST_F(FileSystemTest, DirectoryIteration) { @@ -350,6 +362,7 @@ TEST_F(FileSystemTest, FileMapping) { StringRef Val("hello there"); { fs::mapped_file_region mfr(FileDescriptor, + true, fs::mapped_file_region::readwrite, 4096, 0, diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index 563cab0d5d..b0ef67ac88 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -639,11 +639,11 @@ static bool ReadCheckFile(SourceMgr &SM, << ec.message() << '\n'; return true; } - MemoryBuffer *F = File.take(); // If we want to canonicalize whitespace, strip excess whitespace from the // buffer containing the CHECK lines. Remove DOS style line endings. - F = CanonicalizeInputFile(F, NoCanonicalizeWhiteSpace); + MemoryBuffer *F = + CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace); SM.AddNewSourceBuffer(F, SMLoc()); @@ -803,16 +803,16 @@ int main(int argc, char **argv) { << ec.message() << '\n'; return 2; } - MemoryBuffer *F = File.take(); - if (F->getBufferSize() == 0) { + if (File->getBufferSize() == 0) { errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; return 2; } - + // Remove duplicate spaces in the input file if requested. // Remove DOS style line endings. - F = CanonicalizeInputFile(F, NoCanonicalizeWhiteSpace); + MemoryBuffer *F = + CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace); SM.AddNewSourceBuffer(F, SMLoc()); diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index d0f44be69a..3ee1974431 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -33,4 +33,5 @@ add_tablegen(llvm-tblgen LLVM X86DisassemblerTables.cpp X86ModRMFilters.cpp X86RecognizableInstr.cpp + CTagsEmitter.cpp ) diff --git a/utils/TableGen/CTagsEmitter.cpp b/utils/TableGen/CTagsEmitter.cpp new file mode 100644 index 0000000000..8bf777839b --- /dev/null +++ b/utils/TableGen/CTagsEmitter.cpp @@ -0,0 +1,99 @@ +//===- CTagsEmitter.cpp - Generate ctags-compatible index ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tablegen backend emits an index of definitions in ctags(1) format. +// A helper script, utils/TableGen/tdtags, provides an easier-to-use +// interface; run 'tdtags -H' for documentation. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ctags-emitter" + +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" +#include <algorithm> +#include <string> +#include <vector> +using namespace llvm; + +namespace llvm { extern SourceMgr SrcMgr; } + +namespace { + +class Tag { +private: + const std::string *Id; + SMLoc Loc; +public: + Tag(const std::string &Name, const SMLoc Location) + : Id(&Name), Loc(Location) {} + int operator<(const Tag &B) const { return *Id < *B.Id; } + void emit(raw_ostream &OS) const { + int BufferID = SrcMgr.FindBufferContainingLoc(Loc); + MemoryBuffer *CurMB = SrcMgr.getBufferInfo(BufferID).Buffer; + const char *BufferName = CurMB->getBufferIdentifier(); + std::pair<unsigned, unsigned> LineAndColumn = SrcMgr.getLineAndColumn(Loc); + OS << *Id << "\t" << BufferName << "\t" << LineAndColumn.first << "\n"; + } +}; + +class CTagsEmitter { +private: + RecordKeeper &Records; +public: + CTagsEmitter(RecordKeeper &R) : Records(R) {} + + void run(raw_ostream &OS); + +private: + static SMLoc locate(const Record *R); +}; + +} // End anonymous namespace. + +SMLoc CTagsEmitter::locate(const Record *R) { + ArrayRef<SMLoc> Locs = R->getLoc(); + if (Locs.empty()) { + SMLoc NullLoc; + return NullLoc; + } + return Locs.front(); +} + +void CTagsEmitter::run(raw_ostream &OS) { + const std::map<std::string, Record *> &Classes = Records.getClasses(); + const std::map<std::string, Record *> &Defs = Records.getDefs(); + std::vector<Tag> Tags; + // Collect tags. + Tags.reserve(Classes.size() + Defs.size()); + for (std::map<std::string, Record *>::const_iterator I = Classes.begin(), + E = Classes.end(); + I != E; ++I) + Tags.push_back(Tag(I->first, locate(I->second))); + for (std::map<std::string, Record *>::const_iterator I = Defs.begin(), + E = Defs.end(); + I != E; ++I) + Tags.push_back(Tag(I->first, locate(I->second))); + // Emit tags. + std::sort(Tags.begin(), Tags.end()); + OS << "!_TAG_FILE_FORMAT\t1\t/original ctags format/\n"; + OS << "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/\n"; + for (std::vector<Tag>::const_iterator I = Tags.begin(), E = Tags.end(); + I != E; ++I) + I->emit(OS); +} + +namespace llvm { + +void EmitCTags(RecordKeeper &RK, raw_ostream &OS) { CTagsEmitter(RK).run(OS); } + +} // End llvm namespace. diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp index 72fb77150c..8e5bb7760f 100644 --- a/utils/TableGen/CodeGenDAGPatterns.cpp +++ b/utils/TableGen/CodeGenDAGPatterns.cpp @@ -57,7 +57,7 @@ EEVT::TypeSet::TypeSet(MVT::SimpleValueType VT, TreePattern &TP) { } -EEVT::TypeSet::TypeSet(const std::vector<MVT::SimpleValueType> &VTList) { +EEVT::TypeSet::TypeSet(ArrayRef<MVT::SimpleValueType> VTList) { assert(!VTList.empty() && "empty list?"); TypeVec.append(VTList.begin(), VTList.end()); @@ -76,7 +76,7 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP, bool (*Pred)(MVT::SimpleValueType), const char *PredicateName) { assert(isCompletelyUnknown()); - const std::vector<MVT::SimpleValueType> &LegalTypes = + ArrayRef<MVT::SimpleValueType> LegalTypes = TP.getDAGPatterns().getTargetInfo().getLegalValueTypes(); if (TP.hasError()) @@ -956,6 +956,40 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N, llvm_unreachable("Invalid ConstraintType!"); } +// Update the node type to match an instruction operand or result as specified +// in the ins or outs lists on the instruction definition. Return true if the +// type was actually changed. +bool TreePatternNode::UpdateNodeTypeFromInst(unsigned ResNo, + Record *Operand, + TreePattern &TP) { + // The 'unknown' operand indicates that types should be inferred from the + // context. + if (Operand->isSubClassOf("unknown_class")) + return false; + + // The Operand class specifies a type directly. + if (Operand->isSubClassOf("Operand")) + return UpdateNodeType(ResNo, getValueType(Operand->getValueAsDef("Type")), + TP); + + // PointerLikeRegClass has a type that is determined at runtime. + if (Operand->isSubClassOf("PointerLikeRegClass")) + return UpdateNodeType(ResNo, MVT::iPTR, TP); + + // Both RegisterClass and RegisterOperand operands derive their types from a + // register class def. + Record *RC = 0; + if (Operand->isSubClassOf("RegisterClass")) + RC = Operand; + else if (Operand->isSubClassOf("RegisterOperand")) + RC = Operand->getValueAsDef("RegClass"); + + assert(RC && "Unknown operand type"); + CodeGenTarget &Tgt = TP.getDAGPatterns().getTargetInfo(); + return UpdateNodeType(ResNo, Tgt.getRegisterClass(RC).getValueTypes(), TP); +} + + //===----------------------------------------------------------------------===// // SDNodeInfo implementation // @@ -1287,8 +1321,18 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) { /// type which should be applied to it. This will infer the type of register /// references from the register file information, for example. /// +/// When Unnamed is set, return the type of a DAG operand with no name, such as +/// the F8RC register class argument in: +/// +/// (COPY_TO_REGCLASS GPR:$src, F8RC) +/// +/// When Unnamed is false, return the type of a named DAG operand such as the +/// GPR:$src operand above. +/// static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo, - bool NotRegisters, TreePattern &TP) { + bool NotRegisters, + bool Unnamed, + TreePattern &TP) { // Check to see if this is a register operand. if (R->isSubClassOf("RegisterOperand")) { assert(ResNo == 0 && "Regoperand ref only has one result!"); @@ -1302,6 +1346,13 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo, // Check to see if this is a register or a register class. if (R->isSubClassOf("RegisterClass")) { assert(ResNo == 0 && "Regclass ref only has one result!"); + // An unnamed register class represents itself as an i32 immediate, for + // example on a COPY_TO_REGCLASS instruction. + if (Unnamed) + return EEVT::TypeSet(MVT::i32, TP); + + // In a named operand, the register class provides the possible set of + // types. if (NotRegisters) return EEVT::TypeSet(); // Unknown. const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo(); @@ -1327,9 +1378,27 @@ static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo, return EEVT::TypeSet(); } - if (R->isSubClassOf("ValueType") || R->isSubClassOf("CondCode")) { + if (R->isSubClassOf("ValueType")) { assert(ResNo == 0 && "This node only has one result!"); - // Using a VTSDNode or CondCodeSDNode. + // An unnamed VTSDNode represents itself as an MVT::Other immediate. + // + // (sext_inreg GPR:$src, i16) + // ~~~ + if (Unnamed) + return EEVT::TypeSet(MVT::Other, TP); + // With a name, the ValueType simply provides the type of the named + // variable. + // + // (sext_inreg i32:$src, i16) + // ~~~~~~~~ + if (NotRegisters) + return EEVT::TypeSet(); // Unknown. + return EEVT::TypeSet(getValueType(R), TP); + } + + if (R->isSubClassOf("CondCode")) { + assert(ResNo == 0 && "This node only has one result!"); + // Using a CondCodeSDNode. return EEVT::TypeSet(MVT::Other, TP); } @@ -1435,7 +1504,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { bool MadeChange = false; for (unsigned i = 0, e = Types.size(); i != e; ++i) MadeChange |= UpdateNodeType(i, getImplicitType(DI->getDef(), i, - NotRegisters, TP), TP); + NotRegisters, + !hasName(), TP), TP); return MadeChange; } @@ -1498,25 +1568,6 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { return MadeChange; } - if (getOperator()->getName() == "COPY_TO_REGCLASS") { - bool MadeChange = false; - MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters); - MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters); - - assert(getChild(0)->getNumTypes() == 1 && - getChild(1)->getNumTypes() == 1 && "Unhandled case"); - - // child #1 of COPY_TO_REGCLASS should be a register class. We don't care - // what type it gets, so if it didn't get a concrete type just give it the - // first viable type from the reg class. - if (!getChild(1)->hasTypeSet(0) && - !getChild(1)->getExtType(0).isCompletelyUnknown()) { - MVT::SimpleValueType RCVT = getChild(1)->getExtType(0).getTypeList()[0]; - MadeChange |= getChild(1)->UpdateNodeType(0, RCVT, TP); - } - return MadeChange; - } - if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) { bool MadeChange = false; @@ -1575,26 +1626,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { // (outs) list of the instruction. // FIXME: Cap at one result so far. unsigned NumResultsToAdd = InstInfo.Operands.NumDefs ? 1 : 0; - for (unsigned ResNo = 0; ResNo != NumResultsToAdd; ++ResNo) { - Record *ResultNode = Inst.getResult(ResNo); - - if (ResultNode->isSubClassOf("PointerLikeRegClass")) { - MadeChange |= UpdateNodeType(ResNo, MVT::iPTR, TP); - } else if (ResultNode->isSubClassOf("RegisterOperand")) { - Record *RegClass = ResultNode->getValueAsDef("RegClass"); - const CodeGenRegisterClass &RC = - CDP.getTargetInfo().getRegisterClass(RegClass); - MadeChange |= UpdateNodeType(ResNo, RC.getValueTypes(), TP); - } else if (ResultNode->isSubClassOf("unknown_class")) { - // Nothing to do. - } else { - assert(ResultNode->isSubClassOf("RegisterClass") && - "Operands should be register classes!"); - const CodeGenRegisterClass &RC = - CDP.getTargetInfo().getRegisterClass(ResultNode); - MadeChange |= UpdateNodeType(ResNo, RC.getValueTypes(), TP); - } - } + for (unsigned ResNo = 0; ResNo != NumResultsToAdd; ++ResNo) + MadeChange |= UpdateNodeTypeFromInst(ResNo, Inst.getResult(ResNo), TP); // If the instruction has implicit defs, we apply the first one as a result. // FIXME: This sucks, it should apply all implicit defs. @@ -1636,30 +1669,44 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { return false; } - MVT::SimpleValueType VT; TreePatternNode *Child = getChild(ChildNo++); unsigned ChildResNo = 0; // Instructions always use res #0 of their op. - if (OperandNode->isSubClassOf("RegisterClass")) { - const CodeGenRegisterClass &RC = - CDP.getTargetInfo().getRegisterClass(OperandNode); - MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP); - } else if (OperandNode->isSubClassOf("RegisterOperand")) { - Record *RegClass = OperandNode->getValueAsDef("RegClass"); - const CodeGenRegisterClass &RC = - CDP.getTargetInfo().getRegisterClass(RegClass); - MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP); - } else if (OperandNode->isSubClassOf("Operand")) { - VT = getValueType(OperandNode->getValueAsDef("Type")); - MadeChange |= Child->UpdateNodeType(ChildResNo, VT, TP); - } else if (OperandNode->isSubClassOf("PointerLikeRegClass")) { - MadeChange |= Child->UpdateNodeType(ChildResNo, MVT::iPTR, TP); - } else if (OperandNode->isSubClassOf("unknown_class")) { - // Nothing to do. - } else - llvm_unreachable("Unknown operand type!"); + // If the operand has sub-operands, they may be provided by distinct + // child patterns, so attempt to match each sub-operand separately. + if (OperandNode->isSubClassOf("Operand")) { + DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo"); + if (unsigned NumArgs = MIOpInfo->getNumArgs()) { + // But don't do that if the whole operand is being provided by + // a single ComplexPattern. + const ComplexPattern *AM = Child->getComplexPatternInfo(CDP); + if (!AM || AM->getNumOperands() < NumArgs) { + // Match first sub-operand against the child we already have. + Record *SubRec = cast<DefInit>(MIOpInfo->getArg(0))->getDef(); + MadeChange |= + Child->UpdateNodeTypeFromInst(ChildResNo, SubRec, TP); + + // And the remaining sub-operands against subsequent children. + for (unsigned Arg = 1; Arg < NumArgs; ++Arg) { + if (ChildNo >= getNumChildren()) { + TP.error("Instruction '" + getOperator()->getName() + + "' expects more operands than were provided."); + return false; + } + Child = getChild(ChildNo++); + + SubRec = cast<DefInit>(MIOpInfo->getArg(Arg))->getDef(); + MadeChange |= + Child->UpdateNodeTypeFromInst(ChildResNo, SubRec, TP); + } + continue; + } + } + } - MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters); + // If we didn't match by pieces above, attempt to match the whole + // operand now. + MadeChange |= Child->UpdateNodeTypeFromInst(ChildResNo, OperandNode, TP); } if (ChildNo != getNumChildren()) { @@ -1668,6 +1715,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { return false; } + for (unsigned i = 0, e = getNumChildren(); i != e; ++i) + MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters); return MadeChange; } @@ -1817,6 +1866,16 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){ return Res; } + // ?:$name or just $name. + if (TheInit == UnsetInit::get()) { + if (OpName.empty()) + error("'?' argument requires a name to match with operand list"); + TreePatternNode *Res = new TreePatternNode(TheInit, 1); + Args.push_back(OpName); + Res->setName(OpName); + return Res; + } + if (IntInit *II = dyn_cast<IntInit>(TheInit)) { if (!OpName.empty()) error("Constant int argument should not have a name!"); @@ -2383,6 +2442,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat, I->error("set destination should be a register!"); if (Val->getDef()->isSubClassOf("RegisterClass") || + Val->getDef()->isSubClassOf("ValueType") || Val->getDef()->isSubClassOf("RegisterOperand") || Val->getDef()->isSubClassOf("PointerLikeRegClass")) { if (Dest->getName().empty()) @@ -2599,6 +2659,25 @@ getInstructionsInTree(TreePatternNode *Tree, SmallVectorImpl<Record*> &Instrs) { getInstructionsInTree(Tree->getChild(i), Instrs); } +/// Check the class of a pattern leaf node against the instruction operand it +/// represents. +static bool checkOperandClass(CGIOperandList::OperandInfo &OI, + Record *Leaf) { + if (OI.Rec == Leaf) + return true; + + // Allow direct value types to be used in instruction set patterns. + // The type will be checked later. + if (Leaf->isSubClassOf("ValueType")) + return true; + + // Patterns can also be ComplexPattern instances. + if (Leaf->isSubClassOf("ComplexPattern")) + return true; + + return false; +} + /// ParseInstructions - Parse all of the instructions, inlining and resolving /// any fragments involved. This populates the Instructions list with fully /// resolved instructions. @@ -2708,7 +2787,7 @@ void CodeGenDAGPatterns::ParseInstructions() { I->error("Operand $" + OpName + " should be a set destination: all " "outputs must occur before inputs in operand list!"); - if (CGI.Operands[i].Rec != R) + if (!checkOperandClass(CGI.Operands[i], R)) I->error("Operand $" + OpName + " class mismatch!"); // Remember the return type. @@ -2747,7 +2826,7 @@ void CodeGenDAGPatterns::ParseInstructions() { if (InVal->isLeaf() && isa<DefInit>(InVal->getLeafValue())) { Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef(); - if (Op.Rec != InRec && !InRec->isSubClassOf("ComplexPattern")) + if (!checkOperandClass(Op, InRec)) I->error("Operand $" + OpName + "'s register class disagrees" " between the operand and pattern"); } diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h index 424f02f7ab..7c2fa36741 100644 --- a/utils/TableGen/CodeGenDAGPatterns.h +++ b/utils/TableGen/CodeGenDAGPatterns.h @@ -59,7 +59,7 @@ namespace EEVT { public: TypeSet() {} TypeSet(MVT::SimpleValueType VT, TreePattern &TP); - TypeSet(const std::vector<MVT::SimpleValueType> &VTList); + TypeSet(ArrayRef<MVT::SimpleValueType> VTList); bool isCompletelyUnknown() const { return TypeVec.empty(); } @@ -334,6 +334,7 @@ public: } ~TreePatternNode(); + bool hasName() const { return !Name.empty(); } const std::string &getName() const { return Name; } void setName(StringRef N) { Name.assign(N.begin(), N.end()); } @@ -463,6 +464,11 @@ public: // Higher level manipulation routines. return Types[ResNo].MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP); } + // Update node type with types inferred from an instruction operand or result + // def from the ins/outs lists. + // Return true if the type changed. + bool UpdateNodeTypeFromInst(unsigned ResNo, Record *Operand, TreePattern &TP); + /// ContainsUnresolvedType - Return true if this tree contains any /// unresolved types. bool ContainsUnresolvedType() const { diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h index a1921a412a..4f2cc28d49 100644 --- a/utils/TableGen/CodeGenRegisters.h +++ b/utils/TableGen/CodeGenRegisters.h @@ -261,7 +261,7 @@ namespace llvm { public: unsigned EnumValue; std::string Namespace; - std::vector<MVT::SimpleValueType> VTs; + SmallVector<MVT::SimpleValueType, 4> VTs; unsigned SpillSize; unsigned SpillAlignment; int CopyCost; @@ -274,7 +274,7 @@ namespace llvm { const std::string &getName() const { return Name; } std::string getQualifiedName() const; - const std::vector<MVT::SimpleValueType> &getValueTypes() const {return VTs;} + ArrayRef<MVT::SimpleValueType> getValueTypes() const {return VTs;} unsigned getNumValueTypes() const { return VTs.size(); } MVT::SimpleValueType getValueTypeNum(unsigned VTNum) const { diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp index 23b79fcddf..c02f0843d6 100644 --- a/utils/TableGen/CodeGenSchedule.cpp +++ b/utils/TableGen/CodeGenSchedule.cpp @@ -88,7 +88,7 @@ struct InstRegexOp : public SetTheory::Operator { /// CodeGenModels ctor interprets machine model records and populates maps. CodeGenSchedModels::CodeGenSchedModels(RecordKeeper &RK, const CodeGenTarget &TGT): - Records(RK), Target(TGT), NumItineraryClasses(0) { + Records(RK), Target(TGT) { Sets.addFieldExpander("InstRW", "Instrs"); @@ -217,7 +217,7 @@ void CodeGenSchedModels::collectSchedRW() { for (CodeGenTarget::inst_iterator I = Target.inst_begin(), E = Target.inst_end(); I != E; ++I) { Record *SchedDef = (*I)->TheDef; - if (!SchedDef->isSubClassOf("Sched")) + if (SchedDef->isValueUnset("SchedRW")) continue; RecVec RWs = SchedDef->getValueAsListOfDefs("SchedRW"); for (RecIter RWI = RWs.begin(), RWE = RWs.end(); RWI != RWE; ++RWI) { @@ -502,40 +502,25 @@ void CodeGenSchedModels::collectSchedClasses() { // NoItinerary is always the first class at Idx=0 SchedClasses.resize(1); - SchedClasses.back().Name = "NoItinerary"; + SchedClasses.back().Index = 0; + SchedClasses.back().Name = "NoInstrModel"; + SchedClasses.back().ItinClassDef = Records.getDef("NoItinerary"); SchedClasses.back().ProcIndices.push_back(0); - SchedClassIdxMap[SchedClasses.back().Name] = 0; - // Gather and sort all itinerary classes used by instruction descriptions. - RecVec ItinClassList; + // Create a SchedClass for each unique combination of itinerary class and + // SchedRW list. for (CodeGenTarget::inst_iterator I = Target.inst_begin(), E = Target.inst_end(); I != E; ++I) { Record *ItinDef = (*I)->TheDef->getValueAsDef("Itinerary"); - // Map a new SchedClass with no index. - if (!SchedClassIdxMap.count(ItinDef->getName())) { - SchedClassIdxMap[ItinDef->getName()] = 0; - ItinClassList.push_back(ItinDef); - } - } - // Assign each itinerary class unique number, skipping NoItinerary==0 - NumItineraryClasses = ItinClassList.size(); - std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord()); - for (unsigned i = 0, N = NumItineraryClasses; i < N; i++) { - Record *ItinDef = ItinClassList[i]; - SchedClassIdxMap[ItinDef->getName()] = SchedClasses.size(); - SchedClasses.push_back(CodeGenSchedClass(ItinDef)); - } - // Infer classes from SchedReadWrite resources listed for each - // instruction definition that inherits from class Sched. - for (CodeGenTarget::inst_iterator I = Target.inst_begin(), - E = Target.inst_end(); I != E; ++I) { - if (!(*I)->TheDef->isSubClassOf("Sched")) - continue; IdxVec Writes, Reads; - findRWs((*I)->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads); + if (!(*I)->TheDef->isValueUnset("SchedRW")) + findRWs((*I)->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads); + // ProcIdx == 0 indicates the class applies to all processors. IdxVec ProcIndices(1, 0); - addSchedClass(Writes, Reads, ProcIndices); + + unsigned SCIdx = addSchedClass(ItinDef, Writes, Reads, ProcIndices); + InstrClassMap[(*I)->TheDef] = SCIdx; } // Create classes for InstRW defs. RecVec InstRWDefs = Records.getAllDerivedDefinitions("InstRW"); @@ -549,68 +534,70 @@ void CodeGenSchedModels::collectSchedClasses() { DEBUG(EnableDump = true); if (!EnableDump) return; + for (CodeGenTarget::inst_iterator I = Target.inst_begin(), E = Target.inst_end(); I != E; ++I) { - Record *SchedDef = (*I)->TheDef; + std::string InstName = (*I)->TheDef->getName(); - if (SchedDef->isSubClassOf("Sched")) { + unsigned SCIdx = InstrClassMap.lookup((*I)->TheDef); + if (!SCIdx) { + dbgs() << "No machine model for " << (*I)->TheDef->getName() << '\n'; + continue; + } + CodeGenSchedClass &SC = getSchedClass(SCIdx); + if (SC.ProcIndices[0] != 0) + PrintFatalError((*I)->TheDef->getLoc(), "Instruction's sched class " + "must not be subtarget specific."); + + IdxVec ProcIndices; + if (SC.ItinClassDef->getName() != "NoItinerary") { + ProcIndices.push_back(0); + dbgs() << "Itinerary for " << InstName << ": " + << SC.ItinClassDef->getName() << '\n'; + } + if (!SC.Writes.empty()) { + ProcIndices.push_back(0); + dbgs() << "SchedRW machine model for " << InstName; + for (IdxIter WI = SC.Writes.begin(), WE = SC.Writes.end(); WI != WE; ++WI) + dbgs() << " " << SchedWrites[*WI].Name; + for (IdxIter RI = SC.Reads.begin(), RE = SC.Reads.end(); RI != RE; ++RI) + dbgs() << " " << SchedReads[*RI].Name; + dbgs() << '\n'; + } + const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs; + for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end(); + RWI != RWE; ++RWI) { + const CodeGenProcModel &ProcModel = + getProcModel((*RWI)->getValueAsDef("SchedModel")); + ProcIndices.push_back(ProcModel.Index); + dbgs() << "InstRW on " << ProcModel.ModelName << " for " << InstName; IdxVec Writes; IdxVec Reads; - findRWs((*I)->TheDef->getValueAsListOfDefs("SchedRW"), Writes, Reads); - dbgs() << "SchedRW machine model for " << InstName; + findRWs((*RWI)->getValueAsListOfDefs("OperandReadWrites"), + Writes, Reads); for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI) dbgs() << " " << SchedWrites[*WI].Name; for (IdxIter RI = Reads.begin(), RE = Reads.end(); RI != RE; ++RI) dbgs() << " " << SchedReads[*RI].Name; dbgs() << '\n'; } - unsigned SCIdx = InstrClassMap.lookup((*I)->TheDef); - if (SCIdx) { - const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs; - for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end(); - RWI != RWE; ++RWI) { - const CodeGenProcModel &ProcModel = - getProcModel((*RWI)->getValueAsDef("SchedModel")); - dbgs() << "InstRW on " << ProcModel.ModelName << " for " << InstName; - IdxVec Writes; - IdxVec Reads; - findRWs((*RWI)->getValueAsListOfDefs("OperandReadWrites"), - Writes, Reads); - for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI) - dbgs() << " " << SchedWrites[*WI].Name; - for (IdxIter RI = Reads.begin(), RE = Reads.end(); RI != RE; ++RI) - dbgs() << " " << SchedReads[*RI].Name; - dbgs() << '\n'; - } - continue; - } - if (!SchedDef->isSubClassOf("Sched") - && (SchedDef->getValueAsDef("Itinerary")->getName() == "NoItinerary")) { - dbgs() << "No machine model for " << (*I)->TheDef->getName() << '\n'; + for (std::vector<CodeGenProcModel>::iterator PI = ProcModels.begin(), + PE = ProcModels.end(); PI != PE; ++PI) { + if (!std::count(ProcIndices.begin(), ProcIndices.end(), PI->Index)) + dbgs() << "No machine model for " << (*I)->TheDef->getName() + << " on processor " << PI->ModelName << '\n'; } } } -unsigned CodeGenSchedModels::getSchedClassIdx( - const RecVec &RWDefs) const { - - IdxVec Writes, Reads; - findRWs(RWDefs, Writes, Reads); - return findSchedClassIdx(Writes, Reads); -} - /// Find an SchedClass that has been inferred from a per-operand list of /// SchedWrites and SchedReads. -unsigned CodeGenSchedModels::findSchedClassIdx(const IdxVec &Writes, +unsigned CodeGenSchedModels::findSchedClassIdx(Record *ItinClassDef, + const IdxVec &Writes, const IdxVec &Reads) const { for (SchedClassIter I = schedClassBegin(), E = schedClassEnd(); I != E; ++I) { - // Classes with InstRWs may have the same Writes/Reads as a class originally - // produced by a SchedRW definition. We need to be able to recover the - // original class index for processors that don't match any InstRWs. - if (I->ItinClassDef || !I->InstRWs.empty()) - continue; - - if (I->Writes == Writes && I->Reads == Reads) { + if (I->ItinClassDef == ItinClassDef + && I->Writes == Writes && I->Reads == Reads) { return I - schedClassBegin(); } } @@ -621,29 +608,17 @@ unsigned CodeGenSchedModels::findSchedClassIdx(const IdxVec &Writes, unsigned CodeGenSchedModels::getSchedClassIdx( const CodeGenInstruction &Inst) const { - unsigned SCIdx = InstrClassMap.lookup(Inst.TheDef); - if (SCIdx) - return SCIdx; - - // If this opcode isn't mapped by the subtarget fallback to the instruction - // definition's SchedRW or ItinDef values. - if (Inst.TheDef->isSubClassOf("Sched")) { - RecVec RWs = Inst.TheDef->getValueAsListOfDefs("SchedRW"); - return getSchedClassIdx(RWs); - } - Record *ItinDef = Inst.TheDef->getValueAsDef("Itinerary"); - assert(SchedClassIdxMap.count(ItinDef->getName()) && "missing ItinClass"); - unsigned Idx = SchedClassIdxMap.lookup(ItinDef->getName()); - assert(Idx <= NumItineraryClasses && "bad ItinClass index"); - return Idx; + return InstrClassMap.lookup(Inst.TheDef); } std::string CodeGenSchedModels::createSchedClassName( - const IdxVec &OperWrites, const IdxVec &OperReads) { + Record *ItinClassDef, const IdxVec &OperWrites, const IdxVec &OperReads) { std::string Name; + if (ItinClassDef && ItinClassDef->getName() != "NoItinerary") + Name = ItinClassDef->getName(); for (IdxIter WI = OperWrites.begin(), WE = OperWrites.end(); WI != WE; ++WI) { - if (WI != OperWrites.begin()) + if (!Name.empty()) Name += '_'; Name += SchedWrites[*WI].Name; } @@ -665,17 +640,18 @@ std::string CodeGenSchedModels::createSchedClassName(const RecVec &InstDefs) { return Name; } -/// Add an inferred sched class from a per-operand list of SchedWrites and -/// SchedReads. ProcIndices contains the set of IDs of processors that may -/// utilize this class. -unsigned CodeGenSchedModels::addSchedClass(const IdxVec &OperWrites, +/// Add an inferred sched class from an itinerary class and per-operand list of +/// SchedWrites and SchedReads. ProcIndices contains the set of IDs of +/// processors that may utilize this class. +unsigned CodeGenSchedModels::addSchedClass(Record *ItinClassDef, + const IdxVec &OperWrites, const IdxVec &OperReads, const IdxVec &ProcIndices) { assert(!ProcIndices.empty() && "expect at least one ProcIdx"); - unsigned Idx = findSchedClassIdx(OperWrites, OperReads); - if (Idx) { + unsigned Idx = findSchedClassIdx(ItinClassDef, OperWrites, OperReads); + if (Idx || SchedClasses[0].isKeyEqual(ItinClassDef, OperWrites, OperReads)) { IdxVec PI; std::set_union(SchedClasses[Idx].ProcIndices.begin(), SchedClasses[Idx].ProcIndices.end(), @@ -687,7 +663,9 @@ unsigned CodeGenSchedModels::addSchedClass(const IdxVec &OperWrites, Idx = SchedClasses.size(); SchedClasses.resize(Idx+1); CodeGenSchedClass &SC = SchedClasses.back(); - SC.Name = createSchedClassName(OperWrites, OperReads); + SC.Index = Idx; + SC.Name = createSchedClassName(ItinClassDef, OperWrites, OperReads); + SC.ItinClassDef = ItinClassDef; SC.Writes = OperWrites; SC.Reads = OperReads; SC.ProcIndices = ProcIndices; @@ -709,19 +687,10 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { PrintFatalError(InstRWDef->getLoc(), "No matching instruction opcodes"); for (RecIter I = InstDefs->begin(), E = InstDefs->end(); I != E; ++I) { - unsigned SCIdx = 0; InstClassMapTy::const_iterator Pos = InstrClassMap.find(*I); - if (Pos != InstrClassMap.end()) - SCIdx = Pos->second; - else { - // This instruction has not been mapped yet. Get the original class. All - // instructions in the same InstrRW class must be from the same original - // class because that is the fall-back class for other processors. - Record *ItinDef = (*I)->getValueAsDef("Itinerary"); - SCIdx = SchedClassIdxMap.lookup(ItinDef->getName()); - if (!SCIdx && (*I)->isSubClassOf("Sched")) - SCIdx = getSchedClassIdx((*I)->getValueAsListOfDefs("SchedRW")); - } + if (Pos == InstrClassMap.end()) + PrintFatalError((*I)->getLoc(), "No sched class for instruction."); + unsigned SCIdx = Pos->second; unsigned CIdx = 0, CEnd = ClassInstrs.size(); for (; CIdx != CEnd; ++CIdx) { if (ClassInstrs[CIdx].first == SCIdx) @@ -741,7 +710,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { ArrayRef<Record*> InstDefs = ClassInstrs[CIdx].second; // If the all instrs in the current class are accounted for, then leave // them mapped to their old class. - if (SchedClasses[OldSCIdx].InstRWs.size() == InstDefs.size()) { + if (OldSCIdx && SchedClasses[OldSCIdx].InstRWs.size() == InstDefs.size()) { assert(SchedClasses[OldSCIdx].ProcIndices[0] == 0 && "expected a generic SchedClass"); continue; @@ -749,6 +718,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { unsigned SCIdx = SchedClasses.size(); SchedClasses.resize(SCIdx+1); CodeGenSchedClass &SC = SchedClasses.back(); + SC.Index = SCIdx; SC.Name = createSchedClassName(InstDefs); // Preserve ItinDef and Writes/Reads for processors without an InstRW entry. SC.ItinClassDef = SchedClasses[OldSCIdx].ItinClassDef; @@ -780,32 +750,48 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) { } } +// True if collectProcItins found anything. +bool CodeGenSchedModels::hasItineraries() const { + for (CodeGenSchedModels::ProcIter PI = procModelBegin(), PE = procModelEnd(); + PI != PE; ++PI) { + if (PI->hasItineraries()) + return true; + } + return false; +} + // Gather the processor itineraries. void CodeGenSchedModels::collectProcItins() { for (std::vector<CodeGenProcModel>::iterator PI = ProcModels.begin(), PE = ProcModels.end(); PI != PE; ++PI) { CodeGenProcModel &ProcModel = *PI; - RecVec ItinRecords = ProcModel.ItinsDef->getValueAsListOfDefs("IID"); - // Skip empty itinerary. - if (ItinRecords.empty()) + if (!ProcModel.hasItineraries()) continue; - ProcModel.ItinDefList.resize(NumItineraryClasses+1); + RecVec ItinRecords = ProcModel.ItinsDef->getValueAsListOfDefs("IID"); + assert(!ItinRecords.empty() && "ProcModel.hasItineraries is incorrect"); + + // Populate ItinDefList with Itinerary records. + ProcModel.ItinDefList.resize(NumInstrSchedClasses); // Insert each itinerary data record in the correct position within // the processor model's ItinDefList. for (unsigned i = 0, N = ItinRecords.size(); i < N; i++) { Record *ItinData = ItinRecords[i]; Record *ItinDef = ItinData->getValueAsDef("TheClass"); - if (!SchedClassIdxMap.count(ItinDef->getName())) { + bool FoundClass = false; + for (SchedClassIter SCI = schedClassBegin(), SCE = schedClassEnd(); + SCI != SCE; ++SCI) { + // Multiple SchedClasses may share an itinerary. Update all of them. + if (SCI->ItinClassDef == ItinDef) { + ProcModel.ItinDefList[SCI->Index] = ItinData; + FoundClass = true; + } + } + if (!FoundClass) { DEBUG(dbgs() << ProcModel.ItinsDef->getName() - << " has unused itinerary class " << ItinDef->getName() << '\n'); - continue; + << " missing class for itinerary " << ItinDef->getName() << '\n'); } - assert(SchedClassIdxMap.count(ItinDef->getName()) && "missing ItinClass"); - unsigned Idx = SchedClassIdxMap.lookup(ItinDef->getName()); - assert(Idx <= NumItineraryClasses && "bad ItinClass index"); - ProcModel.ItinDefList[Idx] = ItinData; } // Check for missing itinerary entries. assert(!ProcModel.ItinDefList[0] && "NoItinerary class can't have rec"); @@ -839,13 +825,17 @@ void CodeGenSchedModels::collectProcItinRW() { /// Infer new classes from existing classes. In the process, this may create new /// SchedWrites from sequences of existing SchedWrites. void CodeGenSchedModels::inferSchedClasses() { + DEBUG(dbgs() << NumInstrSchedClasses << " instr sched classes.\n"); + // Visit all existing classes and newly created classes. for (unsigned Idx = 0; Idx != SchedClasses.size(); ++Idx) { + assert(SchedClasses[Idx].Index == Idx && "bad SCIdx"); + if (SchedClasses[Idx].ItinClassDef) inferFromItinClass(SchedClasses[Idx].ItinClassDef, Idx); - else if (!SchedClasses[Idx].InstRWs.empty()) + if (!SchedClasses[Idx].InstRWs.empty()) inferFromInstRWs(Idx); - else { + if (!SchedClasses[Idx].Writes.empty()) { inferFromRW(SchedClasses[Idx].Writes, SchedClasses[Idx].Reads, Idx, SchedClasses[Idx].ProcIndices); } @@ -1042,11 +1032,13 @@ static bool hasVariant(ArrayRef<PredTransition> Transitions, // Populate IntersectingVariants with any variants or aliased sequences of the // given SchedRW whose processor indices and predicates are not mutually -// exclusive with the given transition, +// exclusive with the given transition. void PredTransitions::getIntersectingVariants( const CodeGenSchedRW &SchedRW, unsigned TransIdx, std::vector<TransVariant> &IntersectingVariants) { + bool GenericRW = false; + std::vector<TransVariant> Variants; if (SchedRW.HasVariants) { unsigned VarProcIdx = 0; @@ -1058,6 +1050,8 @@ void PredTransitions::getIntersectingVariants( const RecVec VarDefs = SchedRW.TheDef->getValueAsListOfDefs("Variants"); for (RecIter RI = VarDefs.begin(), RE = VarDefs.end(); RI != RE; ++RI) Variants.push_back(TransVariant(*RI, SchedRW.Index, VarProcIdx, 0)); + if (VarProcIdx == 0) + GenericRW = true; } for (RecIter AI = SchedRW.Aliases.begin(), AE = SchedRW.Aliases.end(); AI != AE; ++AI) { @@ -1081,6 +1075,8 @@ void PredTransitions::getIntersectingVariants( Variants.push_back( TransVariant(AliasRW.TheDef, SchedRW.Index, AliasProcIdx, 0)); } + if (AliasProcIdx == 0) + GenericRW = true; } for (unsigned VIdx = 0, VEnd = Variants.size(); VIdx != VEnd; ++VIdx) { TransVariant &Variant = Variants[VIdx]; @@ -1118,6 +1114,10 @@ void PredTransitions::getIntersectingVariants( TransVec.push_back(TransVec[TransIdx]); } } + if (GenericRW && IntersectingVariants.empty()) { + PrintFatalError(SchedRW.TheDef->getLoc(), "No variant of this type has " + "a matching predicate on any processor"); + } } // Push the Reads/Writes selected by this variant onto the PredTransition @@ -1215,10 +1215,6 @@ void PredTransitions::substituteVariantOperand( // This will push a copies of TransVec[TransIdx] on the back of TransVec. std::vector<TransVariant> IntersectingVariants; getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants); - if (IntersectingVariants.empty()) - PrintFatalError(SchedRW.TheDef->getLoc(), - "No variant of this type has " - "a matching predicate on any processor"); // Now expand each variant on top of its copy of the transition. for (std::vector<TransVariant>::const_iterator IVI = IntersectingVariants.begin(), @@ -1295,8 +1291,8 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions, IdxVec ProcIndices(I->ProcIndices.begin(), I->ProcIndices.end()); CodeGenSchedTransition SCTrans; SCTrans.ToClassIdx = - SchedModels.addSchedClass(OperWritesVariant, OperReadsVariant, - ProcIndices); + SchedModels.addSchedClass(/*ItinClassDef=*/0, OperWritesVariant, + OperReadsVariant, ProcIndices); SCTrans.ProcIndices = ProcIndices; // The final PredTerm is unique set of predicates guarding the transition. RecVec Preds; @@ -1318,7 +1314,7 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites, const IdxVec &OperReads, unsigned FromClassIdx, const IdxVec &ProcIndices) { - DEBUG(dbgs() << "INFER RW: "); + DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices); dbgs() << ") "); // Create a seed transition with an empty PredTerm and the expanded sequences // of SchedWrites for the current SchedClass. @@ -1542,6 +1538,20 @@ Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind, ProcUnitDef = *RI; } } + RecVec ProcResGroups = Records.getAllDerivedDefinitions("ProcResGroup"); + for (RecIter RI = ProcResGroups.begin(), RE = ProcResGroups.end(); + RI != RE; ++RI) { + + if (*RI == ProcResKind + && (*RI)->getValueAsDef("SchedModel") == PM.ModelDef) { + if (ProcUnitDef) { + PrintFatalError((*RI)->getLoc(), + "Multiple ProcessorResourceUnits associated with " + + ProcResKind->getName()); + } + ProcUnitDef = *RI; + } + } if (!ProcUnitDef) { PrintFatalError(ProcResKind->getLoc(), "No ProcessorResources associated with " @@ -1563,6 +1573,9 @@ void CodeGenSchedModels::addProcResource(Record *ProcResKind, return; PM.ProcResourceDefs.push_back(ProcResUnits); + if (ProcResUnits->isSubClassOf("ProcResGroup")) + return; + if (!ProcResUnits->getValueInit("Super")->isComplete()) return; @@ -1625,7 +1638,7 @@ void CodeGenSchedRW::dump() const { } void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const { - dbgs() << "SCHEDCLASS " << Name << '\n' + dbgs() << "SCHEDCLASS " << Index << ":" << Name << '\n' << " Writes: "; for (unsigned i = 0, N = Writes.size(); i < N; ++i) { SchedModels->getSchedWrite(Writes[i]).dump(); @@ -1643,6 +1656,13 @@ void CodeGenSchedClass::dump(const CodeGenSchedModels* SchedModels) const { } } dbgs() << "\n ProcIdx: "; dumpIdxVec(ProcIndices); dbgs() << '\n'; + if (!Transitions.empty()) { + dbgs() << "\n Transitions for Proc "; + for (std::vector<CodeGenSchedTransition>::const_iterator + TI = Transitions.begin(), TE = Transitions.end(); TI != TE; ++TI) { + dumpIdxVec(TI->ProcIndices); + } + } } void PredTransitions::dump() const { diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h index 78f115ecd8..e5b9118fb0 100644 --- a/utils/TableGen/CodeGenSchedule.h +++ b/utils/TableGen/CodeGenSchedule.h @@ -125,6 +125,7 @@ struct CodeGenSchedTransition { /// itinerary class. Each inherits the processor index from the ItinRW record /// that mapped the itinerary class to the variant Writes or Reads. struct CodeGenSchedClass { + unsigned Index; std::string Name; Record *ItinClassDef; @@ -141,12 +142,16 @@ struct CodeGenSchedClass { // off to join another inferred class. RecVec InstRWs; - CodeGenSchedClass(): ItinClassDef(0) {} - CodeGenSchedClass(Record *rec): ItinClassDef(rec) { - Name = rec->getName(); - ProcIndices.push_back(0); + CodeGenSchedClass(): Index(0), ItinClassDef(0) {} + + bool isKeyEqual(Record *IC, const IdxVec &W, const IdxVec &R) { + return ItinClassDef == IC && Writes == W && Reads == R; } + // Is this class generated from a variants if existing classes? Instructions + // are never mapped directly to inferred scheduling classes. + bool isInferred() const { return !ItinClassDef; } + #ifndef NDEBUG void dump(const CodeGenSchedModels *SchedModels) const; #endif @@ -189,11 +194,16 @@ struct CodeGenProcModel { // Per-operand machine model resources associated with this processor. RecVec ProcResourceDefs; + RecVec ProcResGroupDefs; CodeGenProcModel(unsigned Idx, const std::string &Name, Record *MDef, Record *IDef) : Index(Idx), ModelName(Name), ModelDef(MDef), ItinsDef(IDef) {} + bool hasItineraries() const { + return !ItinsDef->getValueAsListOfDefs("IID").empty(); + } + bool hasInstrSchedModel() const { return !WriteResDefs.empty() || !ItinRWDefs.empty(); } @@ -227,24 +237,11 @@ class CodeGenSchedModels { // List of unique SchedClasses. std::vector<CodeGenSchedClass> SchedClasses; - // Map SchedClass name to itinerary index. - // These are either explicit itinerary classes or classes implied by - // instruction definitions with SchedReadWrite lists. - StringMap<unsigned> SchedClassIdxMap; - - // SchedClass indices 1 up to and including NumItineraryClasses identify - // itinerary classes that are explicitly used for this target's instruction - // definitions. NoItinerary always has index 0 regardless of whether it is - // explicitly referenced. - // - // Any implied SchedClass has an index greater than NumItineraryClasses. - unsigned NumItineraryClasses; - // Any inferred SchedClass has an index greater than NumInstrSchedClassses. unsigned NumInstrSchedClasses; - // Map Instruction to SchedClass index. Only for Instructions mentioned in - // InstRW records. + // Map each instruction to its unique SchedClass index considering the + // combination of it's itinerary class, SchedRW list, and InstRW records. typedef DenseMap<Record*, unsigned> InstClassMapTy; InstClassMapTy InstrClassMap; @@ -280,6 +277,9 @@ public: ProcIter procModelBegin() const { return ProcModels.begin(); } ProcIter procModelEnd() const { return ProcModels.end(); } + // Return true if any processors have itineraries. + bool hasItineraries() const; + // Get a SchedWrite from its index. const CodeGenSchedRW &getSchedWrite(unsigned Idx) const { assert(Idx < SchedWrites.size() && "bad SchedWrite index"); @@ -311,16 +311,6 @@ public: // Return true if the given write record is referenced by a ReadAdvance. bool hasReadOfWrite(Record *WriteDef) const; - // Check if any instructions are assigned to an explicit itinerary class other - // than NoItinerary. - bool hasItineraryClasses() const { return NumItineraryClasses > 0; } - - // Return the number of itinerary classes in use by this target's instruction - // descriptions, not including "NoItinerary". - unsigned numItineraryClasses() const { - return NumItineraryClasses; - } - // Get a SchedClass from its index. CodeGenSchedClass &getSchedClass(unsigned Idx) { assert(Idx < SchedClasses.size() && "bad SchedClass index"); @@ -336,28 +326,26 @@ public: // for NoItinerary. unsigned getSchedClassIdx(const CodeGenInstruction &Inst) const; - unsigned getSchedClassIdx(const RecVec &RWDefs) const; - - unsigned getSchedClassIdxForItin(const Record *ItinDef) { - return SchedClassIdxMap[ItinDef->getName()]; - } - typedef std::vector<CodeGenSchedClass>::const_iterator SchedClassIter; SchedClassIter schedClassBegin() const { return SchedClasses.begin(); } SchedClassIter schedClassEnd() const { return SchedClasses.end(); } + unsigned numInstrSchedClasses() const { return NumInstrSchedClasses; } + void findRWs(const RecVec &RWDefs, IdxVec &Writes, IdxVec &Reads) const; void findRWs(const RecVec &RWDefs, IdxVec &RWs, bool IsRead) const; void expandRWSequence(unsigned RWIdx, IdxVec &RWSeq, bool IsRead) const; void expandRWSeqForProc(unsigned RWIdx, IdxVec &RWSeq, bool IsRead, const CodeGenProcModel &ProcModel) const; - unsigned addSchedClass(const IdxVec &OperWrites, const IdxVec &OperReads, - const IdxVec &ProcIndices); + unsigned addSchedClass(Record *ItinDef, const IdxVec &OperWrites, + const IdxVec &OperReads, const IdxVec &ProcIndices); unsigned findOrInsertRW(ArrayRef<unsigned> Seq, bool IsRead); - unsigned findSchedClassIdx(const IdxVec &Writes, const IdxVec &Reads) const; + unsigned findSchedClassIdx(Record *ItinClassDef, + const IdxVec &Writes, + const IdxVec &Reads) const; Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM) const; @@ -375,7 +363,8 @@ private: void collectSchedClasses(); - std::string createSchedClassName(const IdxVec &OperWrites, + std::string createSchedClassName(Record *ItinClassDef, + const IdxVec &OperWrites, const IdxVec &OperReads); std::string createSchedClassName(const RecVec &InstDefs); void createInstRWClass(Record *InstRWDef); diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index dd3f6e29fd..8b292b9572 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -229,7 +229,7 @@ getRegisterVTs(Record *R) const { for (unsigned i = 0, e = RCs.size(); i != e; ++i) { const CodeGenRegisterClass &RC = *RCs[i]; if (RC.contains(Reg)) { - const std::vector<MVT::SimpleValueType> &InVTs = RC.getValueTypes(); + ArrayRef<MVT::SimpleValueType> InVTs = RC.getValueTypes(); Result.insert(Result.end(), InVTs.begin(), InVTs.end()); } } diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h index 4a1c6d8fcb..6271443029 100644 --- a/utils/TableGen/CodeGenTarget.h +++ b/utils/TableGen/CodeGenTarget.h @@ -68,7 +68,7 @@ class CodeGenTarget { mutable DenseMap<const Record*, CodeGenInstruction*> Instructions; mutable CodeGenRegBank *RegBank; mutable std::vector<Record*> RegAltNameIndices; - mutable std::vector<MVT::SimpleValueType> LegalValueTypes; + mutable SmallVector<MVT::SimpleValueType, 8> LegalValueTypes; void ReadRegAltNameIndices() const; void ReadInstructions() const; void ReadLegalValueTypes() const; @@ -129,7 +129,7 @@ public: /// specified physical register. std::vector<MVT::SimpleValueType> getRegisterVTs(Record *R) const; - const std::vector<MVT::SimpleValueType> &getLegalValueTypes() const { + ArrayRef<MVT::SimpleValueType> getLegalValueTypes() const { if (LegalValueTypes.empty()) ReadLegalValueTypes(); return LegalValueTypes; } @@ -137,7 +137,7 @@ public: /// isLegalValueType - Return true if the specified value type is natively /// supported by the target (i.e. there are registers that directly hold it). bool isLegalValueType(MVT::SimpleValueType VT) const { - const std::vector<MVT::SimpleValueType> &LegalVTs = getLegalValueTypes(); + ArrayRef<MVT::SimpleValueType> LegalVTs = getLegalValueTypes(); for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i) if (LegalVTs[i] == VT) return true; return false; diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp index 38ffa30ec8..ed41631456 100644 --- a/utils/TableGen/DAGISelMatcherGen.cpp +++ b/utils/TableGen/DAGISelMatcherGen.cpp @@ -211,6 +211,12 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) { return AddMatcher(new CheckIntegerMatcher(II->getValue())); } + // An UnsetInit represents a named node without any constraints. + if (N->getLeafValue() == UnsetInit::get()) { + assert(N->hasName() && "Unnamed ? leaf"); + return; + } + DefInit *DI = dyn_cast<DefInit>(N->getLeafValue()); if (DI == 0) { errs() << "Unknown leaf kind: " << *N << "\n"; @@ -218,6 +224,17 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) { } Record *LeafRec = DI->getDef(); + + // A ValueType leaf node can represent a register when named, or itself when + // unnamed. + if (LeafRec->isSubClassOf("ValueType")) { + // A named ValueType leaf always matches: (add i32:$a, i32:$b). + if (N->hasName()) + return; + // An unnamed ValueType as in (sext_inreg GPR:$foo, i8). + return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName())); + } + if (// Handle register references. Nothing to do here, they always match. LeafRec->isSubClassOf("RegisterClass") || LeafRec->isSubClassOf("RegisterOperand") || @@ -236,9 +253,6 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) { return; } - if (LeafRec->isSubClassOf("ValueType")) - return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName())); - if (LeafRec->isSubClassOf("CondCode")) return AddMatcher(new CheckCondCodeMatcher(LeafRec->getName())); @@ -734,20 +748,33 @@ EmitResultInstructionAsOperand(const TreePatternNode *N, continue; } - const TreePatternNode *Child = N->getChild(ChildNo); - // Otherwise this is a normal operand or a predicate operand without // 'execute always'; emit it. - unsigned BeforeAddingNumOps = InstOps.size(); - EmitResultOperand(Child, InstOps); - assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands"); - // If the operand is an instruction and it produced multiple results, just - // take the first one. - if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction")) - InstOps.resize(BeforeAddingNumOps+1); + // For operands with multiple sub-operands we may need to emit + // multiple child patterns to cover them all. However, ComplexPattern + // children may themselves emit multiple MI operands. + unsigned NumSubOps = 1; + if (OperandNode->isSubClassOf("Operand")) { + DagInit *MIOpInfo = OperandNode->getValueAsDag("MIOperandInfo"); + if (unsigned NumArgs = MIOpInfo->getNumArgs()) + NumSubOps = NumArgs; + } - ++ChildNo; + unsigned FinalNumOps = InstOps.size() + NumSubOps; + while (InstOps.size() < FinalNumOps) { + const TreePatternNode *Child = N->getChild(ChildNo); + unsigned BeforeAddingNumOps = InstOps.size(); + EmitResultOperand(Child, InstOps); + assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands"); + + // If the operand is an instruction and it produced multiple results, just + // take the first one. + if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction")) + InstOps.resize(BeforeAddingNumOps+1); + + ++ChildNo; + } } // If this node has input glue or explicitly specified input physregs, we diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp index cf225117e1..1b5d90b8bd 100644 --- a/utils/TableGen/RegisterInfoEmitter.cpp +++ b/utils/TableGen/RegisterInfoEmitter.cpp @@ -1031,7 +1031,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target, } // Build a shared array of value types. - SequenceToOffsetTable<std::vector<MVT::SimpleValueType> > VTSeqs; + SequenceToOffsetTable<SmallVector<MVT::SimpleValueType, 4> > VTSeqs; for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) VTSeqs.add(RegisterClasses[rc]->VTs); VTSeqs.layout(); diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 93eef868ea..98892e1144 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -87,6 +87,8 @@ class SubtargetEmitter { const CodeGenProcModel &ProcModel); Record *FindReadAdvance(const CodeGenSchedRW &SchedRead, const CodeGenProcModel &ProcModel); + void ExpandProcResources(RecVec &PRVec, std::vector<int64_t> &Cycles, + const CodeGenProcModel &ProcModel); void GenSchedClassTables(const CodeGenProcModel &ProcModel, SchedClassTables &SchedTables); void EmitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS); @@ -445,17 +447,15 @@ EmitStageAndOperandCycleData(raw_ostream &OS, // If this processor defines no itineraries, then leave the itinerary list // empty. std::vector<InstrItinerary> &ItinList = ProcItinLists.back(); - if (ProcModel.ItinDefList.empty()) + if (!ProcModel.hasItineraries()) continue; - // Reserve index==0 for NoItinerary. - ItinList.resize(SchedModels.numItineraryClasses()+1); - const std::string &Name = ProcModel.ItinsDef->getName(); - // For each itinerary data - for (unsigned SchedClassIdx = 0, - SchedClassEnd = ProcModel.ItinDefList.size(); + ItinList.resize(SchedModels.numInstrSchedClasses()); + assert(ProcModel.ItinDefList.size() == ItinList.size() && "bad Itins"); + + for (unsigned SchedClassIdx = 0, SchedClassEnd = ItinList.size(); SchedClassIdx < SchedClassEnd; ++SchedClassIdx) { // Next itinerary data @@ -631,13 +631,31 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) { Record *PRDef = ProcModel.ProcResourceDefs[i]; - // Find the SuperIdx - unsigned SuperIdx = 0; Record *SuperDef = 0; - if (PRDef->getValueInit("Super")->isComplete()) { - SuperDef = - SchedModels.findProcResUnits(PRDef->getValueAsDef("Super"), ProcModel); - SuperIdx = ProcModel.getProcResourceIdx(SuperDef); + unsigned SuperIdx = 0; + unsigned NumUnits = 0; + bool IsBuffered = true; + if (PRDef->isSubClassOf("ProcResGroup")) { + RecVec ResUnits = PRDef->getValueAsListOfDefs("Resources"); + for (RecIter RUI = ResUnits.begin(), RUE = ResUnits.end(); + RUI != RUE; ++RUI) { + if (!NumUnits) + IsBuffered = (*RUI)->getValueAsBit("Buffered"); + else if(IsBuffered != (*RUI)->getValueAsBit("Buffered")) + PrintFatalError(PRDef->getLoc(), + "Mixing buffered and unbuffered resources."); + NumUnits += (*RUI)->getValueAsInt("NumUnits"); + } + } + else { + // Find the SuperIdx + if (PRDef->getValueInit("Super")->isComplete()) { + SuperDef = SchedModels.findProcResUnits( + PRDef->getValueAsDef("Super"), ProcModel); + SuperIdx = ProcModel.getProcResourceIdx(SuperDef); + } + NumUnits = PRDef->getValueAsInt("NumUnits"); + IsBuffered = PRDef->getValueAsBit("Buffered"); } // Emit the ProcResourceDesc if (i+1 == e) @@ -645,8 +663,8 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel, OS << " {DBGFIELD(\"" << PRDef->getName() << "\") "; if (PRDef->getName().size() < 15) OS.indent(15 - PRDef->getName().size()); - OS << PRDef->getValueAsInt("NumUnits") << ", " << SuperIdx << ", " - << PRDef->getValueAsBit("Buffered") << "}" << Sep << " // #" << i+1; + OS << NumUnits << ", " << SuperIdx << ", " + << IsBuffered << "}" << Sep << " // #" << i+1; if (SuperDef) OS << ", Super=" << SuperDef->getName(); OS << "\n"; @@ -763,6 +781,51 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead, return ResDef; } +// Expand an explicit list of processor resources into a full list of implied +// resource groups that cover them. +// +// FIXME: Effectively consider a super-resource a group that include all of its +// subresources to allow mixing and matching super-resources and groups. +// +// FIXME: Warn if two overlapping groups don't have a common supergroup. +void SubtargetEmitter::ExpandProcResources(RecVec &PRVec, + std::vector<int64_t> &Cycles, + const CodeGenProcModel &ProcModel) { + // Default to 1 resource cycle. + Cycles.resize(PRVec.size(), 1); + for (unsigned i = 0, e = PRVec.size(); i != e; ++i) { + RecVec SubResources; + if (PRVec[i]->isSubClassOf("ProcResGroup")) { + SubResources = PRVec[i]->getValueAsListOfDefs("Resources"); + std::sort(SubResources.begin(), SubResources.end(), LessRecord()); + } + else { + SubResources.push_back(PRVec[i]); + } + for (RecIter PRI = ProcModel.ProcResourceDefs.begin(), + PRE = ProcModel.ProcResourceDefs.end(); + PRI != PRE; ++PRI) { + if (*PRI == PRVec[i] || !(*PRI)->isSubClassOf("ProcResGroup")) + continue; + RecVec SuperResources = (*PRI)->getValueAsListOfDefs("Resources"); + std::sort(SuperResources.begin(), SuperResources.end(), LessRecord()); + RecIter SubI = SubResources.begin(), SubE = SubResources.end(); + RecIter SuperI = SuperResources.begin(), SuperE = SuperResources.end(); + for ( ; SubI != SubE && SuperI != SuperE; ++SuperI) { + if (*SubI < *SuperI) + break; + else if (*SuperI < *SubI) + continue; + ++SubI; + } + if (SubI == SubE) { + PRVec.push_back(*PRI); + Cycles.push_back(Cycles[i]); + } + } + } +} + // Generate the SchedClass table for this processor and update global // tables. Must be called for each processor in order. void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, @@ -787,7 +850,22 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, SCDesc.ReadAdvanceIdx = 0; // A Variant SchedClass has no resources of its own. - if (!SCI->Transitions.empty()) { + bool HasVariants = false; + for (std::vector<CodeGenSchedTransition>::const_iterator + TI = SCI->Transitions.begin(), TE = SCI->Transitions.end(); + TI != TE; ++TI) { + if (TI->ProcIndices[0] == 0) { + HasVariants = true; + break; + } + IdxIter PIPos = std::find(TI->ProcIndices.begin(), + TI->ProcIndices.end(), ProcModel.Index); + if (PIPos != TI->ProcIndices.end()) { + HasVariants = true; + break; + } + } + if (HasVariants) { SCDesc.NumMicroOps = MCSchedClassDesc::VariantNumMicroOps; continue; } @@ -804,27 +882,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, } IdxVec Writes = SCI->Writes; IdxVec Reads = SCI->Reads; - if (SCI->ItinClassDef) { - assert(SCI->InstRWs.empty() && "ItinClass should not have InstRWs"); - // Check this processor's itinerary class resources. - for (RecIter II = ProcModel.ItinRWDefs.begin(), - IE = ProcModel.ItinRWDefs.end(); II != IE; ++II) { - RecVec Matched = (*II)->getValueAsListOfDefs("MatchedItinClasses"); - if (std::find(Matched.begin(), Matched.end(), SCI->ItinClassDef) - != Matched.end()) { - SchedModels.findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"), - Writes, Reads); - break; - } - } - if (Writes.empty()) { - DEBUG(dbgs() << ProcModel.ItinsDef->getName() - << " does not have resources for itinerary class " - << SCI->ItinClassDef->getName() << '\n'); - } - } - else if (!SCI->InstRWs.empty()) { - // This class may have a default ReadWrite list which can be overriden by + if (!SCI->InstRWs.empty()) { + // This class has a default ReadWrite list which can be overriden by // InstRW definitions. Record *RWDef = 0; for (RecIter RWI = SCI->InstRWs.begin(), RWE = SCI->InstRWs.end(); @@ -842,6 +901,23 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, Writes, Reads); } } + if (Writes.empty()) { + // Check this processor's itinerary class resources. + for (RecIter II = ProcModel.ItinRWDefs.begin(), + IE = ProcModel.ItinRWDefs.end(); II != IE; ++II) { + RecVec Matched = (*II)->getValueAsListOfDefs("MatchedItinClasses"); + if (std::find(Matched.begin(), Matched.end(), SCI->ItinClassDef) + != Matched.end()) { + SchedModels.findRWs((*II)->getValueAsListOfDefs("OperandReadWrites"), + Writes, Reads); + break; + } + } + if (Writes.empty()) { + DEBUG(dbgs() << ProcModel.ModelName + << " does not have resources for class " << SCI->Name << '\n'); + } + } // Sum resources across all operand writes. std::vector<MCWriteProcResEntry> WriteProcResources; std::vector<MCWriteLatencyEntry> WriteLatencies; @@ -859,7 +935,8 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, WriterNames.push_back(SchedModels.getSchedWrite(WriteID).Name); // If this Write is not referenced by a ReadAdvance, don't distinguish it // from other WriteLatency entries. - if (!SchedModels.hasReadOfWrite(SchedModels.getSchedWrite(WriteID).TheDef)) { + if (!SchedModels.hasReadOfWrite( + SchedModels.getSchedWrite(WriteID).TheDef)) { WriteID = 0; } WLEntry.WriteResourceID = WriteID; @@ -884,15 +961,15 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel, RecVec PRVec = WriteRes->getValueAsListOfDefs("ProcResources"); std::vector<int64_t> Cycles = WriteRes->getValueAsListOfInts("ResourceCycles"); + + ExpandProcResources(PRVec, Cycles, ProcModel); + for (unsigned PRIdx = 0, PREnd = PRVec.size(); PRIdx != PREnd; ++PRIdx) { MCWriteProcResEntry WPREntry; WPREntry.ProcResourceIdx = ProcModel.getProcResourceIdx(PRVec[PRIdx]); assert(WPREntry.ProcResourceIdx && "Bad ProcResourceIdx"); - if (Cycles.size() > PRIdx) - WPREntry.Cycles = Cycles[PRIdx]; - else - WPREntry.Cycles = 1; + WPREntry.Cycles = Cycles[PRIdx]; // If this resource is already used in this sequence, add the current // entry's cycles so that the same resource appears to be used // serially, rather than multiple parallel uses. This is important for @@ -1075,7 +1152,7 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables, // The first class is always invalid. We no way to distinguish it except by // name and position. - assert(SchedModels.getSchedClass(0).Name == "NoItinerary" + assert(SchedModels.getSchedClass(0).Name == "NoInstrModel" && "invalid class not first"); OS << " {DBGFIELD(\"InvalidSchedClass\") " << MCSchedClassDesc::InvalidNumMicroOps @@ -1132,7 +1209,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { - SchedModels.schedClassBegin()) << ",\n"; else OS << " 0, 0, 0, 0, // No instruction-level machine model.\n"; - if (SchedModels.hasItineraryClasses()) + if (SchedModels.hasItineraries()) OS << " " << PI->ItinsDef->getName() << ");\n"; else OS << " 0); // No Itinerary\n"; @@ -1189,7 +1266,7 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) { << "#define DBGFIELD(x)\n" << "#endif\n"; - if (SchedModels.hasItineraryClasses()) { + if (SchedModels.hasItineraries()) { std::vector<std::vector<InstrItinerary> > ProcItinLists; // Emit the stage data EmitStageAndOperandCycleData(OS, ProcItinLists); @@ -1230,7 +1307,7 @@ void SubtargetEmitter::EmitSchedModelHelpers(std::string ClassName, SCE = SchedModels.schedClassEnd(); SCI != SCE; ++SCI) { if (SCI->Transitions.empty()) continue; - VariantClasses.push_back(SCI - SchedModels.schedClassBegin()); + VariantClasses.push_back(SCI->Index); } if (!VariantClasses.empty()) { OS << " switch (SchedClass) {\n"; @@ -1277,13 +1354,8 @@ void SubtargetEmitter::EmitSchedModelHelpers(std::string ClassName, if (*PI == 0) break; } - unsigned SCIdx = 0; - if (SC.ItinClassDef) - SCIdx = SchedModels.getSchedClassIdxForItin(SC.ItinClassDef); - else - SCIdx = SchedModels.findSchedClassIdx(SC.Writes, SC.Reads); - if (SCIdx != *VCI) - OS << " return " << SCIdx << ";\n"; + if (SC.isInferred()) + OS << " return " << SC.Index << ";\n"; OS << " break;\n"; } OS << " };\n"; @@ -1389,7 +1461,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { << Target << "WriteProcResTable, " << Target << "WriteLatencyTable, " << Target << "ReadAdvanceTable, "; - if (SchedModels.hasItineraryClasses()) { + if (SchedModels.hasItineraries()) { OS << '\n'; OS.indent(22); OS << Target << "Stages, " << Target << "OperandCycles, " @@ -1446,7 +1518,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { OS << "extern const llvm::MCReadAdvanceEntry " << Target << "ReadAdvanceTable[];\n"; - if (SchedModels.hasItineraryClasses()) { + if (SchedModels.hasItineraries()) { OS << "extern const llvm::InstrStage " << Target << "Stages[];\n"; OS << "extern const unsigned " << Target << "OperandCycles[];\n"; OS << "extern const unsigned " << Target << "ForwardingPaths[];\n"; @@ -1470,7 +1542,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { << Target << "WriteLatencyTable, " << Target << "ReadAdvanceTable, "; OS << '\n'; OS.indent(22); - if (SchedModels.hasItineraryClasses()) { + if (SchedModels.hasItineraries()) { OS << Target << "Stages, " << Target << "OperandCycles, " << Target << "ForwardingPaths, "; diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp index 510f2548cd..b5c3ca760d 100644 --- a/utils/TableGen/TableGen.cpp +++ b/utils/TableGen/TableGen.cpp @@ -40,7 +40,8 @@ enum ActionType { GenTgtIntrinsic, PrintEnums, PrintSets, - GenOptParserDefs + GenOptParserDefs, + GenCTags }; namespace { @@ -82,6 +83,8 @@ namespace { "Print expanded sets for testing DAG exprs"), clEnumValN(GenOptParserDefs, "gen-opt-parser-defs", "Generate option definitions"), + clEnumValN(GenCTags, "gen-ctags", + "Generate ctags-compatible index"), clEnumValEnd)); cl::opt<std::string> @@ -161,6 +164,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { } break; } + case GenCTags: + EmitCTags(Records, OS); + break; } return false; diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h index cc99092658..28b626e17e 100644 --- a/utils/TableGen/TableGenBackends.h +++ b/utils/TableGen/TableGenBackends.h @@ -75,5 +75,6 @@ void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS); void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS); void EmitMapTable(RecordKeeper &RK, raw_ostream &OS); void EmitOptParser(RecordKeeper &RK, raw_ostream &OS); +void EmitCTags(RecordKeeper &RK, raw_ostream &OS); } // End llvm namespace diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index 8471faaaf6..61b9813b06 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -37,14 +37,15 @@ using namespace llvm; MAP(D1, 46) \ MAP(D4, 47) \ MAP(D5, 48) \ - MAP(D8, 49) \ - MAP(D9, 50) \ - MAP(DA, 51) \ - MAP(DB, 52) \ - MAP(DC, 53) \ - MAP(DD, 54) \ - MAP(DE, 55) \ - MAP(DF, 56) + MAP(D6, 49) \ + MAP(D8, 50) \ + MAP(D9, 51) \ + MAP(DA, 52) \ + MAP(DB, 53) \ + MAP(DC, 54) \ + MAP(DD, 55) \ + MAP(DE, 56) \ + MAP(DF, 57) // A clone of X86 since we can't depend on something that is generated. namespace X86Local { diff --git a/utils/TableGen/tdtags b/utils/TableGen/tdtags new file mode 100644 index 0000000000..5214485f4e --- /dev/null +++ b/utils/TableGen/tdtags @@ -0,0 +1,453 @@ +#!/bin/sh +#===-- tdtags - TableGen tags wrapper ---------------------------*- sh -*-===# +# vim:set sts=2 sw=2 et: +#===----------------------------------------------------------------------===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===----------------------------------------------------------------------===# +# +# This is a wrapper script to simplify generating ctags(1)-compatible index +# files for target .td files. Run tdtags -H for more documentation. +# +# For portability, this script is intended to conform to IEEE Std 1003.1-2008. +# +#===----------------------------------------------------------------------===# + +SELF=${0##*/} + +usage() { +cat <<END +Usage: $SELF [ <options> ] tdfile + or: $SELF [ <options> ] -x recipe [arg ...] +OPTIONS + -H Display further help. + -a Append the tags to an existing tags file. + -f <file> Write tags to the specified file (defaults to 'tags'). + -I <dir> Add the directory to the search path for tblgen include files. + -x <recipe> Generate tags file(s) for a common use case: + -q Suppress $TBLGEN error messages. + -v Be verbose; report progress. +END + usage_recipes +} + +usage_recipes() { +cat <<END + all - Generate an index in each directory that contains .td files + in the LLVM source tree. + here - Generate an index for all .td files in the current directory. + recurse - Generate an index in each directory that contains .td files + in and under the current directory. + target [<target> ...] + - Generate a tags file for each specified LLVM code generator + target, or if none are specified, all targets. +END +} + +help() { +cat <<END +NAME + $SELF - generate ctags(1)-compatible index files for tblgen .td source + +SYNOPSIS + $SELF [ options ] -x recipe [arg ...] + $SELF [ options ] [file ...] + +DESCRIPTION + With the '-x' option, $SELF produces one or more tags files for a + particular common use case. See the RECIPES section below for details. + + Without the '-x' option, $SELF provides a ctags(1)-like interface to + $TBLGEN. + +OPTIONS + -a Append newly generated tags to those already in an existing + tags file. Without ths option, any and all existing tags are + replaced. NOTE: When building a mixed tags file, using ${SELF} + for tblgen tags and ctags(1) for other languages, it is best + to run ${SELF} first without '-a', and ctags(1) second with '-a', + because ctags(1) handling is more capable. + -f <file> Use the name <file> for the tags file, rather than the default + "tags". If the <file> is "-", then the tag index is written to + standard output. + -H Display this document. + -I <dir> Add the directory <dir> to the search path for 'include' + statements in tblgen source. + -x Run a canned recipe, rather than operate on specified files. + When '-x' is present, the first non-option argument is the + name of a recipe, and any further arguments are arguments to + that recipe. With no arguments, lists the available recipes. + -q Suppress $TBLGEN error messages. Not all .td files are well- + formed outside a specific context, so recipes will sometimes + produce error messages for certain .td files. These errors + do not affect the indices produced for valid files. + -v Be verbose; report progress. + +RECIPES + $SELF -x all + Produce a tags file in every directory in the LLVM source tree + that contains any .td files. + $SELF -x here + Produce a tags file from .td files in the current directory. + $SELF -x recurse + Produce a tags file in every directory that contains any .td + files, in and under the current directory. + $SELF -x target [<target> ...] + Produce a tags file for each named code generator target, or + if none are named, for all code generator targets. +END +} + +# Temporary file management. +# +# Since SUS sh(1) has no arrays, this script makes extensive use of +# temporary files. The follow are 'global' and used to carry information +# across functions: +# $TMP:D Include directories. +# $TMP:I Included files. +# $TMP:T Top-level files, that are not included by another. +# $TMP:W Directories in which to generate tags (Worklist). +# For portability to OS X, names must not differ only in case. +# +TMP=${TMPDIR:-/tmp}/$SELF:$$ +trap "rm -f $TMP*" 0 +trap exit 1 2 13 15 +>$TMP:D + +td_dump() +{ + if [ $OPT_VERBOSE -gt 1 ] + then + printf '===== %s =====\n' "$1" + cat <"$1" + fi +} + +# Escape the arguments, taken as a whole. +e() { + printf '%s' "$*" | + sed -e "s/'/'\\\\''/g" -e "1s/^/'/" -e "\$s/\$/'/" +} + +# Determine whether the given directory contains at least one .td file. +dir_has_td() { + for i in $1/*.td + do + [ -f "$i" ] && return 0 + done + return 1 +} + +# Partition the supplied list of files, plus any files included from them, +# into two groups: +# $TMP:T Top-level files, that are not included by another. +# $TMP:I Included files. +# Add standard directories to the include paths in $TMP:D if this would +# benefit the any of the included files. +td_prep() { + >$TMP:E + >$TMP:J + for i in *.td + do + [ "x$i" = 'x*.td' ] && return 1 + if [ -f "$i" ] + then + printf '%s\n' "$i" >>$TMP:E + sed -n -e 's/include[[:space:]]"\(.*\)".*/\1/p' <"$i" >>$TMP:J + else + printf >&2 '%s: "%s" not found.\n' "$SELF" "$i" + exit 7 + fi + done + sort -u <$TMP:E >$TMP:X + sort -u <$TMP:J >$TMP:I + # A file that exists but is not included is toplevel. + comm -23 $TMP:X $TMP:I >$TMP:T + td_dump $TMP:T + td_dump $TMP:I + # Check include files. + while read i + do + [ -f "$i" ] && continue + while read d + do + [ -f "$d/$i" ] && break + done <$TMP:D + if [ -z "$d" ] + then + # See whether this include file can be found in a common location. + for d in $LLVM_SRC_ROOT/include \ + $LLVM_SRC_ROOT/tools/clang/include + do + if [ -f "$d/$i" ] + then + printf '%s\n' "$d" >>$TMP:D + break + fi + done + fi + done <$TMP:I + td_dump $TMP:D +} + +# Generate tags for the list of files in $TMP:T. +td_tag() { + # Collect include directories. + inc= + while read d + do + inc="${inc}${inc:+ }$(e "-I=$d")" + done <$TMP:D + + if [ $OPT_VERBOSE -ne 0 ] + then + printf >&2 'In "%s",\n' "$PWD" + fi + + # Generate tags for each file. + n=0 + while read i + do + if [ $OPT_VERBOSE -ne 0 ] + then + printf >&2 ' generating tags from "%s"\n' "$i" + fi + n=$((n + 1)) + t=$(printf '%s:A:%05u' "$TMP" $n) + eval $TBLGEN --gen-ctags $inc "$i" >$t 2>$TMP:F + [ $OPT_NOTBLGENERR -eq 1 ] || cat $TMP:F + done <$TMP:T + + # Add existing tags if requested. + if [ $OPT_APPEND -eq 1 -a -f "$OPT_TAGSFILE" ] + then + if [ $OPT_VERBOSE -ne 0 ] + then + printf >&2 ' and existing tags from "%s"\n' "$OPT_TAGSFILE" + fi + n=$((n + 1)) + t=$(printf '%s:A:%05u' "$TMP" $n) + sed -e '/^!_TAG_/d' <"$OPT_TAGSFILE" | sort -u >$t + fi + + # Merge tags. + if [ $n = 1 ] + then + mv -f "$t" $TMP:M + else + sort -m -u $TMP:A:* >$TMP:M + fi + + # Emit tags. + if [ x${OPT_TAGSFILE}x = x-x ] + then + cat $TMP:M + else + if [ $OPT_VERBOSE -ne 0 ] + then + printf >&2 ' into "%s".\n' "$OPT_TAGSFILE" + fi + mv -f $TMP:M "$OPT_TAGSFILE" + fi +} + +# Generate tags for the current directory. +td_here() { + td_prep + [ -s $TMP:T ] || return 1 + td_tag +} + +# Generate tags for the current directory, and report an error if there are +# no .td files present. +do_here() +{ + if ! td_here + then + printf >&2 '%s: Nothing to do here.\n' "$SELF" + exit 1 + fi +} + +# Generate tags for all .td files under the current directory. +do_recurse() +{ + td_find "$PWD" + td_dirs +} + +# Generate tags for all .td files in LLVM. +do_all() +{ + td_find "$LLVM_SRC_ROOT" + td_dirs +} + +# Generate tags for each directory in the worklist $TMP:W. +td_dirs() +{ + while read d + do + (cd "$d" && td_here) + done <$TMP:W +} + +# Find directories containing .td files within the specified directory, +# and record them in the worklist $TMP:W. +td_find() +{ + find -L "$1" -type f -name '*.td' | + sed -e 's:/[^/]*$::' | + sort -u >$TMP:W + td_dump $TMP:W +} + +# Generate tags for the specified code generator targets, or +# if there are no arguments, all targets. +do_targets() { + cd $LLVM_SRC_ROOT/lib/Target + if [ -z "$*" ] + then + td_find "$PWD" + else + # Check that every specified argument is a target directory; + # if not, list all target directories. + for d + do + if [ -d "$d" ] && dir_has_td "$d" + then + printf '%s/%s\n' "$PWD" "$d" + else + printf >&2 '%s: "%s" is not a target. Targets are:\n' "$SELF" "$d" + for d in * + do + [ -d "$d" ] || continue + dir_has_td "$d" && printf >&2 ' %s\n' "$d" + done + exit 2 + fi + done >$TMP:W + fi + td_dirs +} + +# Change to the directory at the top of the enclosing LLVM source tree, +# if possible. +llvm_src_root() { + while [ "$PWD" != / ] + do + # Use this directory if multiple notable subdirectories are present. + [ -d include/llvm -a -d lib/Target ] && return 0 + cd .. + done + return 1 +} + +# Ensure sort(1) behaves consistently. +LC_ALL=C +export LC_ALL + +# Globals. +TBLGEN=llvm-tblgen +LLVM_SRC_ROOT= + +# Command options. +OPT_TAGSFILE=tags +OPT_RECIPES=0 +OPT_APPEND=0 +OPT_VERBOSE=0 +OPT_NOTBLGENERR=0 + +while getopts 'af:hxqvHI:' opt +do + case $opt in + a) + OPT_APPEND=1 + ;; + f) + OPT_TAGSFILE="$OPTARG" + ;; + x) + OPT_RECIPES=1 + ;; + q) + OPT_NOTBLGENERR=1 + ;; + v) + OPT_VERBOSE=$((OPT_VERBOSE + 1)) + ;; + I) + printf '%s\n' "$OPTARG" >>$TMP:D + ;; + [hH]) + help + exit 0 + ;; + *) + usage >&2 + exit 4 + ;; + esac +done +shift $((OPTIND - 1)) + +# Handle the case where tdtags is a simple ctags(1)-like wrapper for tblgen. +if [ $OPT_RECIPES -eq 0 ] +then + if [ -z "$*" ] + then + help >&2 + exit 5 + fi + for i + do + printf '%s\n' "$i" + done >$TMP:T + td_tag + exit $? +fi + +# Find the directory at the top of the enclosing LLVM source tree. +if ! LLVM_SRC_ROOT=$(llvm_src_root && pwd) +then + printf >&2 '%s: Run from within the LLVM source tree.\n' "$SELF" + exit 3 +fi + +# Select canned actions. +RECIPE="$1" +case "$RECIPE" in +all) + shift + do_all + ;; +.|cwd|here) + shift + do_here + ;; +recurse) + shift + do_recurse + ;; +target) + shift + do_targets "$@" + ;; +*) + if [ -n "$RECIPE" ] + then + shift + printf >&2 '%s: Unknown recipe "-x %s". ' "$SELF" "$RECIPE" + fi + printf >&2 'Recipes:\n' + usage_recipes >&2 + printf >&2 'Run "%s -H" for help.\n' "$SELF" + exit 6 + ;; +esac + +exit $? diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py index a272976ca0..26541f183b 100644 --- a/utils/lit/lit/TestFormats.py +++ b/utils/lit/lit/TestFormats.py @@ -97,6 +97,9 @@ class GoogleTest(object): if litConfig.useValgrind: cmd = litConfig.valgrindArgs + cmd + if litConfig.noExecute: + return Test.PASS, '' + out, err, exitCode = TestRunner.executeCommand( cmd, env=test.config.environment) diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py index 07fb43f840..84176996a8 100644 --- a/utils/lit/lit/TestRunner.py +++ b/utils/lit/lit/TestRunner.py @@ -277,7 +277,10 @@ def executeScript(test, litConfig, tmpBase, commands, cwd): script += '.bat' # Write script file - f = open(script,'w') + mode = 'w' + if litConfig.isWindows and not isWin32CMDEXE: + mode += 'b' # Avoid CRLFs when writing bash scripts. + f = open(script, mode) if isWin32CMDEXE: f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands)) else: |